From 947e982ede0e65a5b319e1d2c007b6ae0106398d Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Mon, 28 Jul 2025 21:46:39 -0400
Subject: [PATCH 001/224] [Docs] Minimize spacing for supported_hardware.md
 table (#21779)

---
 .../quantization/supported_hardware.md        | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/docs/features/quantization/supported_hardware.md b/docs/features/quantization/supported_hardware.md
index 70a6a499562a3..f53e69ecc6115 100644
--- a/docs/features/quantization/supported_hardware.md
+++ b/docs/features/quantization/supported_hardware.md
@@ -2,19 +2,26 @@
 
 The table below shows the compatibility of various quantization implementations with different hardware platforms in vLLM:
 
+<style>
+th {
+  white-space: nowrap;
+  min-width: 0 !important;
+}
+</style>
+
 | Implementation        | Volta   | Turing   | Ampere   | Ada   | Hopper   | AMD GPU   | Intel GPU   | Intel Gaudi | x86 CPU   | AWS Neuron   | Google TPU   |
 |-----------------------|---------|----------|----------|-------|----------|-----------|-------------|-------------|-----------|--------------|--------------|
-| AWQ                   | ❌      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌        | ✅︎          | ❌         | ✅︎        | ❌           | ❌           |
-| GPTQ                  | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎        | ❌        | ✅︎          | ❌         | ✅︎        | ❌           | ❌           |
-| Marlin (GPTQ/AWQ/FP8) | ❌      | ❌      | ✅︎       | ✅︎    | ✅︎       | ❌        | ❌          | ❌         | ❌        | ❌          | ❌           |
-| INT8 (W8A8)           | ❌      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌        | ❌          | ❌         | ✅︎        | ✅︎           | ✅︎            |
-| FP8 (W8A8)            | ❌      | ❌      | ❌       | ✅︎    | ✅︎      | ✅︎         | ❌          | ❌         | ❌        | ✅︎           | ❌           |
+| AWQ                   | ❌      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ✅︎          | ❌         | ✅︎        | ❌          | ❌           |
+| GPTQ                  | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ✅︎          | ❌         | ✅︎        | ❌          | ❌           |
+| Marlin (GPTQ/AWQ/FP8) | ❌      | ❌       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌          | ❌         | ❌        | ❌          | ❌           |
+| INT8 (W8A8)           | ❌      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌          | ❌         | ✅︎        | ✅︎          | ✅︎           |
+| FP8 (W8A8)            | ❌      | ❌       | ❌       | ✅︎    | ✅︎       | ✅︎         | ❌          | ❌         | ❌        | ✅︎          | ❌           |
 | BitBLAS (GPTQ)        | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌          | ❌         | ❌        | ❌          | ❌           |
 | AQLM                  | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌          | ❌         | ❌        | ❌          | ❌           |
 | bitsandbytes          | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌          | ❌         | ❌        | ❌          | ❌           |
 | DeepSpeedFP           | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌          | ❌         | ❌        | ❌          | ❌           |
-| GGUF                  | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ✅︎         | ❌          | ❌         | ❌         | ❌          | ❌           |
-| INC (W8A8)            | ❌      | ❌      | ❌      | ❌    | ❌      | ❌        | ❌          | ✅︎         | ❌         | ❌           | ❌          |
+| GGUF                  | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ✅︎         | ❌          | ❌         | ❌        | ❌          | ❌           |
+| INC (W8A8)            | ❌      | ❌       | ❌       | ❌    | ❌       | ❌         | ❌          | ✅︎         | ❌        | ❌          | ❌           |
 
 - Volta refers to SM 7.0, Turing to SM 7.5, Ampere to SM 8.0/8.6, Ada to SM 8.9, and Hopper to SM 9.0.
 - ✅︎ indicates that the quantization method is supported on the specified hardware.

From 48b763d6b5c969024a8a5ae30c2bf9a91e8ac032 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Mon, 28 Jul 2025 21:47:21 -0400
Subject: [PATCH 002/224] [Refactor] Merge Compressed Tensor FP8
 `CompressedTensorsW8A8Fp8MoEMethod` and
 `CompressedTensorsW8A8Fp8MoECutlassMethod` (#21775)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 .../compressed_tensors_moe.py                 | 389 +++++-------------
 1 file changed, 100 insertions(+), 289 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
index 8f69636dda7bf..17b41e8a1c23c 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -45,7 +45,6 @@ class GPTQMarlinState(Enum):
 
 __all__ = [
     "CompressedTensorsMoEMethod", "CompressedTensorsW8A8Fp8MoEMethod",
-    "CompressedTensorsW8A8Fp8MoECutlassMethod",
     "CompressedTensorsW8A8Int8MoEMethod",
     "CompressedTensorsWNA16MarlinMoEMethod", "CompressedTensorsWNA16MoEMethod",
     "CompressedTensorsW4A4MoeMethod"
@@ -84,9 +83,8 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
         elif quant_config._is_fp4a4_nvfp4(weight_quant, input_quant):
             return CompressedTensorsW4A4MoeMethod()
         elif (quant_config._is_fp8_w8a8_sm90(weight_quant, input_quant)
-              or quant_config._is_fp8_w8a8_sm100(weight_quant, input_quant)):
-            return CompressedTensorsW8A8Fp8MoECutlassMethod(quant_config)
-        elif quant_config._is_fp8_w8a8(weight_quant, input_quant):
+              or quant_config._is_fp8_w8a8_sm100(weight_quant, input_quant)
+              or quant_config._is_fp8_w8a8(weight_quant, input_quant)):
             return CompressedTensorsW8A8Fp8MoEMethod(quant_config)
         elif quant_config._is_dynamic_token_w8a8(weight_quant, input_quant):
             return CompressedTensorsW8A8Int8MoEMethod(quant_config)
@@ -378,6 +376,14 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
 
         self.rocm_aiter_moe_enabled = is_rocm_aiter_moe_enabled()
 
+        # cutlass path
+        self.is_fp8_w8a8_sm100 = quant_config._is_fp8_w8a8_sm100(
+            self.weight_quant, self.input_quant)
+        self.use_cutlass = (quant_config._is_fp8_w8a8_sm90(
+            self.weight_quant, self.input_quant) or self.is_fp8_w8a8_sm100)
+        self.fused_experts = None  # type: ignore[assignment]
+        self.disable_expert_map = False
+
     def create_weights(self, layer: torch.nn.Module, num_experts: int,
                        hidden_size: int, intermediate_size_per_partition: int,
                        params_dtype: torch.dtype, **extra_weight_attrs):
@@ -558,6 +564,34 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
         prepare_finalize: FusedMoEPrepareAndFinalize,
         moe: FusedMoEConfig,
     ) -> FusedMoEPermuteExpertsUnpermute:
+        # cutlass path
+        if self.use_cutlass:
+            from vllm.model_executor.layers.fused_moe import CutlassExpertsFp8
+
+            use_batched_format = (prepare_finalize.activation_format ==
+                                  FusedMoEActivationFormat.BatchedExperts)
+
+            num_dispatchers = prepare_finalize.num_dispatchers()
+            num_experts = (moe.num_local_experts
+                           if use_batched_format else moe.num_experts)
+
+            logger.debug("CutlassExpertsFp8(%s)", self.__class__.__name__)
+
+            experts = CutlassExpertsFp8(
+                num_experts,
+                moe.in_dtype,
+                self.input_quant.strategy == QuantizationStrategy.TOKEN,
+                self.weight_quant.strategy == QuantizationStrategy.CHANNEL,
+                num_dispatchers=num_dispatchers,
+                use_batched_format=use_batched_format,
+            )
+
+            self.disable_expert_map = (num_dispatchers > 1
+                                       or not experts.supports_expert_map())
+
+            return experts
+
+        # triton path
         from vllm.model_executor.layers.fused_moe import TritonExperts
         from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
             BatchedTritonExperts)
@@ -629,6 +663,68 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
             indices_type=self.topk_indices_dtype,
         )
 
+        # cutlass path
+        if self.use_cutlass:
+            per_act_token = (
+                self.input_quant.strategy == QuantizationStrategy.TOKEN)
+            per_channel_quant = (
+                self.weight_quant.strategy == QuantizationStrategy.CHANNEL)
+
+            # small-batch fallback on SM100
+            if self.is_fp8_w8a8_sm100 and topk_ids.shape[0] <= 8:
+                from vllm.model_executor.layers.fused_moe import fused_experts
+                return fused_experts(
+                    hidden_states=x,
+                    w1=layer.w13_weight,
+                    w2=layer.w2_weight,
+                    topk_weights=topk_weights,
+                    topk_ids=topk_ids,
+                    inplace=True,
+                    activation=activation,
+                    apply_router_weight_on_input=apply_router_weight_on_input,
+                    use_fp8_w8a8=True,
+                    per_channel_quant=per_channel_quant,
+                    global_num_experts=global_num_experts,
+                    expert_map=None if self.disable_expert_map else expert_map,
+                    w1_scale=layer.w13_weight_scale,
+                    w2_scale=layer.w2_weight_scale,
+                    a1_scale=layer.w13_input_scale,
+                    a2_scale=layer.w2_input_scale)
+
+            if self.fused_experts is None:
+                from vllm.model_executor.layers.fused_moe.cutlass_moe import (
+                    cutlass_moe_fp8)
+                return cutlass_moe_fp8(
+                    x,
+                    layer.w13_weight,
+                    layer.w2_weight,
+                    topk_weights,
+                    topk_ids,
+                    per_act_token=per_act_token,
+                    activation=activation,
+                    global_num_experts=global_num_experts,
+                    expert_map=None if self.disable_expert_map else expert_map,
+                    w1_scale=layer.w13_weight_scale,
+                    w2_scale=layer.w2_weight_scale,
+                    a1_scale=layer.w13_input_scale,
+                    a2_scale=layer.w2_input_scale,
+                )
+            else:
+                return self.fused_experts(
+                    x,
+                    layer.w13_weight,
+                    layer.w2_weight,
+                    topk_weights,
+                    topk_ids,
+                    activation=activation,
+                    global_num_experts=global_num_experts,
+                    expert_map=None if self.disable_expert_map else expert_map,
+                    w1_scale=layer.w13_weight_scale,
+                    w2_scale=layer.w2_weight_scale,
+                    a1_scale=layer.w13_input_scale,
+                    a2_scale=layer.w2_input_scale,
+                )
+
         if self.rocm_aiter_moe_enabled:
             return self.rocm_aiter_fused_experts_func(
                 hidden_states=x,
@@ -685,291 +781,6 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
             a2_scale=layer.w2_input_scale)
 
 
-class CompressedTensorsW8A8Fp8MoECutlassMethod(CompressedTensorsMoEMethod):
-
-    def __init__(
-            self,
-            quant_config: "CompressedTensorsConfig"  # type: ignore # noqa E501
-    ):
-        self.quant_config = quant_config
-        self.weight_quant = self.quant_config.target_scheme_map["Linear"].get(
-            "weights")
-        self.input_quant = self.quant_config.target_scheme_map["Linear"].get(
-            "input_activations")
-
-        per_tensor = (self.weight_quant.strategy == QuantizationStrategy.TENSOR
-                      and self.input_quant.strategy
-                      == QuantizationStrategy.TENSOR)
-        per_channel = (
-            self.weight_quant.strategy == QuantizationStrategy.CHANNEL
-            and self.input_quant.strategy == QuantizationStrategy.TOKEN)
-        if not (per_tensor or per_channel):
-            raise ValueError(
-                "For FP8 Fused MoE layers, we require per tensor "
-                "or channelwise, dynamic per token quantization. Found "
-                f"{self.weight_quant}, {self.input_quant}")
-
-        self.static_input_scales = not self.input_quant.dynamic
-        if self.static_input_scales and per_channel:
-            raise ValueError(
-                "For FP8 Fused MoE layer, we require either per tensor or "
-                "channelwise, dynamic per token quantization.")
-
-        self.topk_indices_dtype = None
-        self.fused_experts = None  # type: ignore
-        self.disable_expert_map = False
-        self.is_fp8_w8a8_sm100 = self.quant_config._is_fp8_w8a8_sm100(
-            self.weight_quant, self.input_quant)
-
-    def create_weights(self, layer: torch.nn.Module, num_experts: int,
-                       hidden_size: int, intermediate_size_per_partition: int,
-                       params_dtype: torch.dtype, **extra_weight_attrs):
-
-        params_dtype = torch.float8_e4m3fn
-
-        # WEIGHTS
-        w13_weight = torch.nn.Parameter(torch.empty(
-            num_experts,
-            2 * intermediate_size_per_partition,
-            hidden_size,
-            dtype=params_dtype),
-                                        requires_grad=False)
-        layer.register_parameter("w13_weight", w13_weight)
-        set_weight_attrs(w13_weight, extra_weight_attrs)
-
-        w2_weight = torch.nn.Parameter(torch.empty(
-            num_experts,
-            hidden_size,
-            intermediate_size_per_partition,
-            dtype=params_dtype),
-                                       requires_grad=False)
-        layer.register_parameter("w2_weight", w2_weight)
-        set_weight_attrs(w2_weight, extra_weight_attrs)
-
-        # WEIGHT_SCALES
-        if self.weight_quant.strategy == QuantizationStrategy.TENSOR:
-            # Allocate 2 scales for w1 and w3 respectively.
-            # They are combined to a single scale after weight loading.
-            w13_weight_scale = torch.nn.Parameter(torch.ones(
-                num_experts, 2, dtype=torch.float32),
-                                                  requires_grad=False)
-            layer.register_parameter("w13_weight_scale", w13_weight_scale)
-            w2_weight_scale = torch.nn.Parameter(torch.ones(
-                num_experts, dtype=torch.float32),
-                                                 requires_grad=False)
-            layer.register_parameter("w2_weight_scale", w2_weight_scale)
-            # Add PER-TENSOR quantization for FusedMoE.weight_loader.
-            extra_weight_attrs.update(
-                {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value})
-            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-        elif self.weight_quant.strategy == QuantizationStrategy.CHANNEL:
-            w13_weight_scale = torch.nn.Parameter(torch.ones(
-                num_experts,
-                2 * intermediate_size_per_partition,
-                1,
-                dtype=torch.float32),
-                                                  requires_grad=False)
-            layer.register_parameter("w13_weight_scale", w13_weight_scale)
-            w2_weight_scale = torch.nn.Parameter(torch.ones(
-                num_experts, hidden_size, 1, dtype=torch.float32),
-                                                 requires_grad=False)
-            layer.register_parameter("w2_weight_scale", w2_weight_scale)
-            # Add PER-CHANNEL quantization for FusedMoE.weight_loader.
-            extra_weight_attrs.update(
-                {"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value})
-            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-        # INPUT_SCALES
-        if self.static_input_scales:
-            w13_input_scale = torch.nn.Parameter(torch.ones(
-                num_experts, dtype=torch.float32),
-                                                 requires_grad=False)
-            layer.register_parameter("w13_input_scale", w13_input_scale)
-            set_weight_attrs(w13_input_scale, extra_weight_attrs)
-
-            w2_input_scale = torch.nn.Parameter(torch.ones(
-                num_experts, dtype=torch.float32),
-                                                requires_grad=False)
-            layer.register_parameter("w2_input_scale", w2_input_scale)
-            set_weight_attrs(w2_input_scale, extra_weight_attrs)
-        else:
-            layer.w13_input_scale = None
-            layer.w2_input_scale = None
-
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        # Fp8 moe kernels require a single activation scale.
-        # We take the max of all the scales in case they differ.
-        if self.static_input_scales:
-            assert self.input_quant.strategy == QuantizationStrategy.TENSOR
-            if (layer.w13_input_scale is None or layer.w2_input_scale is None):
-                raise ValueError(
-                    "QuantConfig has static quantization, but found "
-                    "activation scales are None.")
-            if (not all_close_1d(layer.w13_input_scale)
-                    or not all_close_1d(layer.w2_input_scale)):
-                logger.warning_once(
-                    "Found input_scales that are not equal for "
-                    "fp8 MoE layer. Using the maximum across experts "
-                    "for each layer.")
-            layer.w13_input_scale = torch.nn.Parameter(
-                layer.w13_input_scale.max(), requires_grad=False)
-            layer.w2_input_scale = torch.nn.Parameter(
-                layer.w2_input_scale.max(), requires_grad=False)
-
-        # For Per-TENSOR case, Fp8 moe kernel needs single weight scale
-        # for w13 per expert. Use max then dequant and requant each expert.
-        if self.weight_quant.strategy == QuantizationStrategy.TENSOR:
-            assert layer.w13_weight_scale is not None
-            shard_size = layer.intermediate_size_per_partition
-            max_w13_scales = layer.w13_weight_scale.max(dim=1).values
-            for expert_id in range(layer.local_num_experts):
-                start = 0
-                for shard_id in range(2):
-                    dq_weight = per_tensor_dequantize(
-                        layer.w13_weight[expert_id][start:start +
-                                                    shard_size, :],
-                        layer.w13_weight_scale[expert_id][shard_id])
-                    layer.w13_weight[expert_id][
-                        start:start + shard_size, :], _ = ops.scaled_fp8_quant(
-                            dq_weight, max_w13_scales[expert_id])
-                    start += shard_size
-            layer.w13_weight_scale = torch.nn.Parameter(max_w13_scales,
-                                                        requires_grad=False)
-
-    def select_gemm_impl(
-        self,
-        prepare_finalize: FusedMoEPrepareAndFinalize,
-        moe: FusedMoEConfig,
-    ) -> FusedMoEPermuteExpertsUnpermute:
-        from vllm.model_executor.layers.fused_moe import CutlassExpertsFp8
-
-        use_batched_format = (prepare_finalize.activation_format ==
-                              FusedMoEActivationFormat.BatchedExperts)
-
-        num_dispatchers = prepare_finalize.num_dispatchers()
-
-        num_experts = (moe.num_local_experts
-                       if use_batched_format else moe.num_experts)
-
-        logger.debug("CutlassExpertsFp8(%s)", self.__class__.__name__)
-
-        experts = CutlassExpertsFp8(
-            num_experts,
-            moe.in_dtype,
-            self.input_quant.strategy == QuantizationStrategy.TOKEN,
-            self.weight_quant.strategy == QuantizationStrategy.CHANNEL,
-            num_dispatchers=num_dispatchers,
-            use_batched_format=use_batched_format,
-        )
-
-        self.disable_expert_map = (num_dispatchers > 1
-                                   or not experts.supports_expert_map())
-
-        return experts
-
-    def apply(
-        self,
-        layer: torch.nn.Module,
-        x: torch.Tensor,
-        router_logits: torch.Tensor,
-        top_k: int,
-        renormalize: bool,
-        use_grouped_topk: bool = False,
-        topk_group: Optional[int] = None,
-        num_expert_group: Optional[int] = None,
-        global_num_experts: int = -1,
-        expert_map: Optional[torch.Tensor] = None,
-        custom_routing_function: Optional[Callable] = None,
-        scoring_func: str = "softmax",
-        e_score_correction_bias: Optional[torch.Tensor] = None,
-        apply_router_weight_on_input: bool = False,
-        activation: str = "silu",
-        enable_eplb: bool = False,
-        expert_load_view: Optional[torch.Tensor] = None,
-        logical_to_physical_map: Optional[torch.Tensor] = None,
-        logical_replica_count: Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
-        if enable_eplb:
-            raise NotImplementedError(
-                "EPLB not supported for "
-                "`CompressedTensorsW8A8Fp8MoECutlassMethod` yet.")
-
-        topk_weights, topk_ids = FusedMoE.select_experts(
-            hidden_states=x,
-            router_logits=router_logits,
-            use_grouped_topk=use_grouped_topk,
-            top_k=top_k,
-            renormalize=renormalize,
-            topk_group=topk_group,
-            num_expert_group=num_expert_group,
-            custom_routing_function=custom_routing_function,
-            scoring_func=scoring_func,
-            e_score_correction_bias=e_score_correction_bias)
-
-        per_act_token = (
-            self.input_quant.strategy == QuantizationStrategy.TOKEN)
-        per_channel_quant = (
-            self.weight_quant.strategy == QuantizationStrategy.CHANNEL)
-        # Triton fused_experts is faster in small batch sizes on SM100.
-        # Fall back to fused_experts in small batch sizes.
-        if self.is_fp8_w8a8_sm100 and topk_ids.shape[0] <= 8:
-            from vllm.model_executor.layers.fused_moe import fused_experts
-            return fused_experts(
-                x,
-                layer.w13_weight,
-                layer.w2_weight,
-                topk_weights,
-                topk_ids,
-                inplace=True,
-                activation=activation,
-                apply_router_weight_on_input=apply_router_weight_on_input,
-                use_fp8_w8a8=True,
-                per_channel_quant=per_channel_quant,
-                global_num_experts=global_num_experts,
-                expert_map=None if self.disable_expert_map else expert_map,
-                w1_scale=layer.w13_weight_scale,
-                w2_scale=layer.w2_weight_scale,
-                a1_scale=layer.w13_input_scale,
-                a2_scale=layer.w2_input_scale)
-        if self.fused_experts is None:
-            # If no modular kernel is provided, use cutlass_moe_fp8
-            from vllm.model_executor.layers.fused_moe.cutlass_moe import (
-                cutlass_moe_fp8)
-            return cutlass_moe_fp8(
-                x,
-                layer.w13_weight,
-                layer.w2_weight,
-                topk_weights,
-                topk_ids,
-                per_act_token=per_act_token,
-                activation=activation,
-                global_num_experts=global_num_experts,
-                expert_map=None if self.disable_expert_map else expert_map,
-                w1_scale=layer.w13_weight_scale,
-                w2_scale=layer.w2_weight_scale,
-                a1_scale=layer.w13_input_scale,
-                a2_scale=layer.w2_input_scale,
-            )
-        else:
-            return self.fused_experts(
-                x,
-                layer.w13_weight,
-                layer.w2_weight,
-                topk_weights,
-                topk_ids,
-                activation=activation,
-                global_num_experts=global_num_experts,
-                expert_map=None if self.disable_expert_map else expert_map,
-                w1_scale=layer.w13_weight_scale,
-                w2_scale=layer.w2_weight_scale,
-                a1_scale=layer.w13_input_scale,
-                a2_scale=layer.w2_input_scale,
-            )
-
-
 class CompressedTensorsW8A8Int8MoEMethod(CompressedTensorsMoEMethod):
 
     def __init__(

From afa26075966301887a15f958a6aec0a89a3faacd Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Mon, 28 Jul 2025 21:56:24 -0400
Subject: [PATCH 003/224] [CI] Parallelize Kernels MoE Test (#21764)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 .buildkite/test-pipeline.yaml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 948ce9e8667f5..ac145453dabde 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -403,17 +403,18 @@ steps:
   - vllm/model_executor/layers/quantization
   - tests/kernels/quantization
   commands:
-    - pytest -v -s kernels/quantization  --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+    - pytest -v -s kernels/quantization --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   parallelism: 2
 
-- label: Kernels MoE Test
+- label: Kernels MoE Test %N
   mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - csrc/moe/
   - tests/kernels/moe
   - vllm/model_executor/layers/fused_moe/
   commands:
-    - pytest -v -s kernels/moe
+    - pytest -v -s kernels/moe --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  parallelism: 2
 
 - label: Kernels Mamba Test
   mirror_hardwares: [amdexperimental, amdproduction]

From e18f0851033fbc4ef55c1989411f2a5666b518c6 Mon Sep 17 00:00:00 2001
From: Calvin Chen <wen.chen@dynamia.ai>
Date: Tue, 29 Jul 2025 09:59:44 +0800
Subject: [PATCH 004/224] skip fusedmoe layer for start_load_kv (#21378)

Signed-off-by: calvin chen <wen.chen@dynamia.ai>
---
 .../kv_connector/v1/p2p/p2p_nccl_connector.py        | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
index d47a75461d72e..32d0e43d71afe 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
@@ -192,8 +192,16 @@ class P2pNcclConnector(KVConnectorBase_V1):
         # Load the KV for each request each layer
         for request in metadata.requests:
             for layer_name in forward_context.no_compile_layers:
-                attn_layer = forward_context.no_compile_layers[layer_name]
-                kv_cache_layer = attn_layer.kv_cache[ \
+                layer = forward_context.no_compile_layers[layer_name]
+
+                # Only process layers that have kv_cache
+                # attribute (attention layers) Skip non-attention
+                # layers like FusedMoE
+                kv_cache = getattr(layer, 'kv_cache', None)
+                if kv_cache is None:
+                    continue
+
+                kv_cache_layer = kv_cache[ \
                     forward_context.virtual_engine]
 
                 kv_cache = self.p2p_nccl_engine.recv_tensor(

From 12a223ef9bfebcc61e477047dce049495fe8c8a8 Mon Sep 17 00:00:00 2001
From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com>
Date: Mon, 28 Jul 2025 23:35:37 -0400
Subject: [PATCH 005/224] [AMD][CI/Build][Bugfix] Guarding CUDA specific
 functions by ifndef ROCM (#21766)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
---
 .../quantization/compressed_tensors/int8_quant_kernels.cu | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/csrc/quantization/compressed_tensors/int8_quant_kernels.cu b/csrc/quantization/compressed_tensors/int8_quant_kernels.cu
index 6a81f159f46ae..d8369108d0bd3 100644
--- a/csrc/quantization/compressed_tensors/int8_quant_kernels.cu
+++ b/csrc/quantization/compressed_tensors/int8_quant_kernels.cu
@@ -1,7 +1,9 @@
 #include <ATen/cuda/CUDAContext.h>
 #include <torch/all.h>
 
-#include "../per_token_group_quant_8bit.h"
+#ifndef USE_ROCM
+  #include "../per_token_group_quant_8bit.h"
+#endif
 
 #include <cmath>
 
@@ -339,10 +341,12 @@ void dynamic_scaled_int8_quant(
       });
 }
 
+#ifndef USE_ROCM
 void per_token_group_quant_int8(const torch::Tensor& input,
                                 torch::Tensor& output_q,
                                 torch::Tensor& output_s, int64_t group_size,
                                 double eps, double int8_min, double int8_max) {
   per_token_group_quant_8bit(input, output_q, output_s, group_size, eps,
                              int8_min, int8_max);
-}
\ No newline at end of file
+}
+#endif

From f1e2c095ecee01db02d0b63aae26d039b940d894 Mon Sep 17 00:00:00 2001
From: Benji Beck <benjibeck@meta.com>
Date: Mon, 28 Jul 2025 22:09:45 -0700
Subject: [PATCH 006/224] Migrate InternVLImageInputs and InternVLVideoInputs
 to TensorSchema (#21684)

Signed-off-by: Benji Beck <benjibeck@meta.com>
---
 vllm/model_executor/models/internvl.py | 115 +++++++++++--------------
 1 file changed, 51 insertions(+), 64 deletions(-)

diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py
index 3637f037751c0..a0e98ca3f8155 100644
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -9,7 +9,7 @@
 # --------------------------------------------------------
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Mapping, Sequence
-from typing import Any, Literal, Optional, TypedDict, TypeVar, Union
+from typing import Annotated, Any, Literal, Optional, TypeVar, Union
 
 import numpy.typing as npt
 import torch
@@ -37,6 +37,7 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.transformers_utils.tokenizer import AnyTokenizer
+from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
 from .interfaces import (MultiModalEmbeddings, SupportsLoRA,
                          SupportsMultiModal, SupportsPP)
@@ -51,54 +52,60 @@ IMAGENET_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_STD = (0.229, 0.224, 0.225)
 
 
-class InternVLImagePixelInputs(TypedDict):
+class InternVLImagePixelInputs(TensorSchema):
+    """
+    Dimensions:
+        - bn: Batch size * number of images
+        - bnp: Batch size * number of images * (1 + num_patches)
+        - c: Number of channels (3)
+        - h: Height of each image patch
+        - w: Width of each image patch
+    """
     type: Literal["pixel_values"]
-    pixel_values_flat: torch.Tensor
+    pixel_values_flat: Annotated[torch.Tensor, TensorShape("bnp", 3, "h", "w")]
+    num_patches: Annotated[torch.Tensor, TensorShape("bn")]
+
+
+class InternVLImageEmbeddingInputs(TensorSchema):
     """
-    Shape:
-    `(batch_size * num_images * (1 + num_patches), num_channels, height, width)`
+    Dimensions:
+        - n: Number of images
+        - f: Total image feature size
+        - h: Hidden size (must match the hidden size of language model backbone)
     """
-
-    num_patches: torch.Tensor
-    """Shape: `(batch_size * num_images)`"""
-
-
-class InternVLImageEmbeddingInputs(TypedDict):
     type: Literal["image_embeds"]
-    data: Union[torch.Tensor, list[torch.Tensor]]
-    """ 
-    A tensor of shape `(num_images, total_image_feature_size, hidden_size)`
-    or a list of tensors of shape `(total_image_feature_size, hidden_size)`
-
-    `hidden_size` must match the hidden size of language model backbone.
-    """
+    data: Annotated[Union[torch.Tensor, list[torch.Tensor]],
+                    TensorShape("n", "f", "h")]
 
 
 InternVLImageInputs = Union[InternVLImagePixelInputs,
                             InternVLImageEmbeddingInputs]
 
 
-class InternVLVideoPixelInputs(TypedDict):
+class InternVLVideoPixelInputs(TensorSchema):
+    """
+    Dimensions:
+        - bvf: Batch size * number of videos * num_frames
+        - bn: Batch size * number of images
+        - c: Number of channels (3)
+        - h: Height of each video frame
+        - w: Width of each video frame
+    """
     type: Literal["pixel_values_videos"]
-    pixel_values_flat: torch.Tensor
+    pixel_values_flat: Annotated[torch.Tensor, TensorShape("bvf", 3, "h", "w")]
+    num_patches: Annotated[torch.Tensor, TensorShape("bn")]
+
+
+class InternVLVideoEmbeddingInputs(TensorSchema):
     """
-    Shape:
-    `(batch_size * num_video * num_frames, num_channels, height, width)`
+    Dimensions:
+        - n: Number of videos
+        - f: Total video feature size
+        - h: Hidden size (must match the hidden size of language model backbone)
     """
-
-    num_patches: torch.Tensor
-    """Shape: `(batch_size * num_images)`"""
-
-
-class InternVLVideoEmbeddingInputs(TypedDict):
     type: Literal["video_embeds"]
-    data: Union[torch.Tensor, list[torch.Tensor]]
-    """ 
-    A tensor of shape `(num_videos, total_video_feature_size, hidden_size)`
-    or a list of tensors of shape `(total_video_feature_size, hidden_size)`
-
-    `hidden_size` must match the hidden size of language model backbone.
-    """
+    data: Annotated[Union[torch.Tensor, list[torch.Tensor]],
+                    TensorShape("n", "f", "h")]
 
 
 InternVLVideoInputs = Union[InternVLVideoPixelInputs,
@@ -1151,26 +1158,6 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
         vit_embeds = self.mlp1(vit_embeds)
         return vit_embeds
 
-    def _validate_pixel_values(self, data: torch.Tensor) -> torch.Tensor:
-
-        h = w = self.config.vision_config.image_size
-        expected_dims = (3, h, w)
-
-        def _validate_shape(d: torch.Tensor):
-            actual_dims = tuple(d.shape)
-
-            if actual_dims != expected_dims:
-                expected_expr = str(expected_dims)
-                raise ValueError(
-                    "The expected shape of pixel values per image per batch "
-                    f" per patch is {expected_expr}. "
-                    f"You supplied {tuple(d.shape)}.")
-
-        for d in data:
-            _validate_shape(d)
-
-        return data
-
     def _parse_and_validate_image_input(
             self, **kwargs: object) -> Optional[InternVLImageInputs]:
         pixel_values_flat = kwargs.pop("pixel_values_flat", None)
@@ -1205,12 +1192,14 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
 
             pixel_values_flat = flatten_bn(pixel_values_flat, concat=True)
             image_num_patches = flatten_bn(image_num_patches, concat=True)
+            expected_h = expected_w = self.config.vision_config.image_size
+            resolve_bindings = {"h": expected_h, "w": expected_w}
 
             return InternVLImagePixelInputs(
                 type="pixel_values",
-                pixel_values_flat=self._validate_pixel_values(
-                    pixel_values_flat),
+                pixel_values_flat=pixel_values_flat,
                 num_patches=image_num_patches,
+                resolve_bindings=resolve_bindings,
             )
 
         raise AssertionError("This line should be unreachable.")
@@ -1225,11 +1214,7 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
             return None
 
         if video_embeds is not None:
-            if not isinstance(video_embeds, (torch.Tensor, list)):
-                raise ValueError("Incorrect type of video embeddings. "
-                                 f"Got type: {type(video_embeds)}")
-
-            return InternVLImageEmbeddingInputs(
+            return InternVLVideoEmbeddingInputs(
                 type="video_embeds",
                 data=flatten_bn(video_embeds),
             )
@@ -1250,12 +1235,14 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
             pixel_values_flat_video = flatten_bn(pixel_values_flat_video,
                                                  concat=True)
             video_num_patches = flatten_bn(video_num_patches, concat=True)
+            expected_h = expected_w = self.config.vision_config.image_size
+            resolve_bindings = {"h": expected_h, "w": expected_w}
 
             return InternVLVideoPixelInputs(
                 type="pixel_values_videos",
-                pixel_values_flat=self._validate_pixel_values(
-                    pixel_values_flat_video),
+                pixel_values_flat=pixel_values_flat_video,
                 num_patches=video_num_patches,
+                resolve_bindings=resolve_bindings,
             )
 
         raise AssertionError("This line should be unreachable.")

From 7234fe26858f2c621901494c307c90e65fe35340 Mon Sep 17 00:00:00 2001
From: Nick Hill <nhill@redhat.com>
Date: Tue, 29 Jul 2025 06:14:47 +0100
Subject: [PATCH 007/224] [Misc] Rework process titles (#21780)

Signed-off-by: Nick Hill <nhill@redhat.com>
---
 vllm/entrypoints/cli/serve.py          |  6 ++++--
 vllm/entrypoints/openai/api_server.py  | 16 ++++++++++++----
 vllm/utils/__init__.py                 | 16 ++++++++++++----
 vllm/v1/engine/coordinator.py          |  7 +++----
 vllm/v1/engine/core.py                 |  7 ++++---
 vllm/v1/executor/multiproc_executor.py | 16 ++++++++++------
 vllm/v1/utils.py                       |  6 +++---
 7 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py
index 68eb2580991c8..a69363e3d98fe 100644
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@@ -21,7 +21,7 @@ from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG,
 from vllm.executor.multiproc_worker_utils import _add_prefix
 from vllm.logger import init_logger
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import FlexibleArgumentParser, bind_process_name, get_tcp_uri
+from vllm.utils import FlexibleArgumentParser, get_tcp_uri
 from vllm.v1.engine.core import EngineCoreProc
 from vllm.v1.engine.utils import CoreEngineProcManager, launch_core_engines
 from vllm.v1.executor.abstract import Executor
@@ -77,7 +77,7 @@ def run_headless(args: argparse.Namespace):
 
     if args.api_server_count > 1:
         raise ValueError("api_server_count can't be set in headless mode")
-    bind_process_name("APIServer_Headless")
+    # set_process_title("Headless_ProcManager")
     # Create the EngineConfig.
     engine_args = vllm.AsyncEngineArgs.from_cli_args(args)
     usage_context = UsageContext.OPENAI_API_SERVER
@@ -140,6 +140,8 @@ def run_multi_api_server(args: argparse.Namespace):
     num_api_servers = args.api_server_count
     assert num_api_servers > 0
 
+    # set_process_title("ProcManager")
+
     if num_api_servers > 1:
         setup_multiprocess_prometheus()
 
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 3d4c4a6b752a7..c375c8755108c 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -11,6 +11,7 @@ import multiprocessing
 import os
 import signal
 import socket
+import sys
 import tempfile
 import uuid
 from argparse import Namespace
@@ -94,15 +95,15 @@ from vllm.entrypoints.openai.serving_transcription import (
 from vllm.entrypoints.openai.tool_parsers import ToolParserManager
 from vllm.entrypoints.utils import (cli_env_setup, load_aware_call,
                                     log_non_default_args, with_cancellation)
+from vllm.executor.multiproc_worker_utils import _add_prefix
 from vllm.logger import init_logger
 from vllm.reasoning import ReasoningParserManager
 from vllm.transformers_utils.config import (
     maybe_register_config_serialize_by_value)
 from vllm.transformers_utils.tokenizer import MistralTokenizer
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import (Device, FlexibleArgumentParser, bind_process_name,
-                        get_open_zmq_ipc_path, is_valid_ipv6_address,
-                        set_ulimit)
+from vllm.utils import (Device, FlexibleArgumentParser, get_open_zmq_ipc_path,
+                        is_valid_ipv6_address, set_process_title, set_ulimit)
 from vllm.v1.metrics.prometheus import get_prometheus_registry
 from vllm.version import __version__ as VLLM_VERSION
 
@@ -1805,6 +1806,13 @@ def setup_server(args):
 
 async def run_server(args, **uvicorn_kwargs) -> None:
     """Run a single-worker API server."""
+
+    # Add process-specific prefix to stdout and stderr.
+    process_name = "APIServer"
+    pid = os.getpid()
+    _add_prefix(sys.stdout, process_name, pid)
+    _add_prefix(sys.stderr, process_name, pid)
+
     listen_address, sock = setup_server(args)
     await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
 
@@ -1820,7 +1828,7 @@ async def run_server_worker(listen_address,
         ToolParserManager.import_tool_parser(args.tool_parser_plugin)
 
     server_index = client_config.get("client_index", 0) if client_config else 0
-    bind_process_name("APIServer", str(server_index))
+    set_process_title("APIServer", str(server_index))
     # Load logging config for uvicorn if specified
     log_config = load_log_config(args.log_config_file)
     if log_config is not None:
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index 054037b8932b7..ae978c855a8e5 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -3282,14 +3282,22 @@ def has_deep_gemm() -> bool:
     return _has_module("deep_gemm")
 
 
-def bind_process_name(name: str, suffix: str = "") -> None:
-    """Bind the process name to a specific name with an optional suffix.
+def set_process_title(name: str,
+                      suffix: str = "",
+                      append: bool = False) -> None:
+    """
+    Set the current process title to a specific name with an
+    optional suffix.
 
     Args:
-        name: The base name to bind the process to.
+        name: The title to assign to the current process.
         suffix: An optional suffix to append to the base name.
+        append: Whether to append to the existing process title.
     """
-    name = f"{envs.VLLM_PROCESS_NAME_PREFIX}::{name}"
     if suffix:
         name = f"{name}_{suffix}"
+    if append:
+        name = f"{setproctitle.getproctitle()}_{name}"
+    else:
+        name = f"{envs.VLLM_PROCESS_NAME_PREFIX}::{name}"
     setproctitle.setproctitle(name)
diff --git a/vllm/v1/engine/coordinator.py b/vllm/v1/engine/coordinator.py
index fc45eea3a73cf..440628576bcb7 100644
--- a/vllm/v1/engine/coordinator.py
+++ b/vllm/v1/engine/coordinator.py
@@ -10,11 +10,10 @@ import zmq
 
 from vllm.config import ParallelConfig
 from vllm.logger import init_logger
-from vllm.utils import get_mp_context, make_zmq_socket
+from vllm.utils import get_mp_context, make_zmq_socket, set_process_title
 from vllm.v1.engine import EngineCoreOutputs, EngineCoreRequestType
 from vllm.v1.serial_utils import MsgpackDecoder
-from vllm.v1.utils import (bind_process_name, get_engine_client_zmq_addr,
-                           shutdown)
+from vllm.v1.utils import get_engine_client_zmq_addr, shutdown
 
 logger = init_logger(__name__)
 
@@ -119,7 +118,7 @@ class DPCoordinatorProc:
     def __init__(self,
                  engine_count: int,
                  min_stats_update_interval_ms: int = 100):
-        bind_process_name(self.__class__.__name__)
+        set_process_title("DPCoordinator")
         self.ctx = zmq.Context()
 
         self.engines = [EngineState() for _ in range(engine_count)]
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 57f60c4b289bb..cad93061e65b0 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -26,8 +26,8 @@ from vllm.lora.request import LoRARequest
 from vllm.tasks import POOLING_TASKS, SupportedTask
 from vllm.transformers_utils.config import (
     maybe_register_config_serialize_by_value)
-from vllm.utils import (bind_process_name, make_zmq_socket,
-                        resolve_obj_by_qualname)
+from vllm.utils import (make_zmq_socket, resolve_obj_by_qualname,
+                        set_process_title)
 from vllm.v1.core.kv_cache_utils import (get_kv_cache_config,
                                          unify_kv_cache_configs)
 from vllm.v1.core.sched.interface import SchedulerInterface
@@ -425,7 +425,6 @@ class EngineCoreProc(EngineCore):
         client_handshake_address: Optional[str] = None,
         engine_index: int = 0,
     ):
-        bind_process_name(self.__class__.__name__, f"{engine_index}")
         self.input_queue = queue.Queue[tuple[EngineCoreRequestType, Any]]()
         self.output_queue = queue.Queue[Union[tuple[int, EngineCoreOutputs],
                                               bytes]]()
@@ -630,11 +629,13 @@ class EngineCoreProc(EngineCore):
             parallel_config: ParallelConfig = kwargs[
                 "vllm_config"].parallel_config
             if parallel_config.data_parallel_size > 1 or dp_rank > 0:
+                set_process_title("DPEngineCore", str(dp_rank))
                 # Set data parallel rank for this engine process.
                 parallel_config.data_parallel_rank = dp_rank
                 parallel_config.data_parallel_rank_local = local_dp_rank
                 engine_core = DPEngineCoreProc(*args, **kwargs)
             else:
+                set_process_title("EngineCore")
                 engine_core = EngineCoreProc(*args, **kwargs)
 
             engine_core.run_busy_loop()
diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py
index 897174c1599df..8270385053852 100644
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -30,8 +30,8 @@ from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
 from vllm.executor.multiproc_worker_utils import (
     _add_prefix, set_multiprocessing_worker_envs)
 from vllm.logger import init_logger
-from vllm.utils import (bind_process_name, get_distributed_init_method,
-                        get_loopback_ip, get_mp_context, get_open_port)
+from vllm.utils import (get_distributed_init_method, get_loopback_ip,
+                        get_mp_context, get_open_port, set_process_title)
 from vllm.v1.executor.abstract import Executor, FailureCallback
 from vllm.v1.outputs import ModelRunnerOutput
 from vllm.worker.worker_base import WorkerWrapperBase
@@ -376,10 +376,14 @@ class WorkerProc:
         }
         wrapper.init_worker(all_kwargs)
         self.worker = wrapper
-        bind_process_name(
-            self.worker.worker.__class__.__name__,
-            f"TP{self.rank}_DP{vllm_config.parallel_config.data_parallel_rank}"
-        )
+
+        pp_size = vllm_config.parallel_config.pipeline_parallel_size
+        tp_size = vllm_config.parallel_config.tensor_parallel_size
+        pp_str = f"PP{rank // tp_size}" if pp_size > 1 else ""
+        tp_str = f"TP{rank % tp_size}" if tp_size > 1 else ""
+        suffix = f"{pp_str}{'_' if pp_str and tp_str else ''}{tp_str}"
+        if suffix:
+            set_process_title(suffix, append=True)
         pid = os.getpid()
         _add_prefix(sys.stdout, f"VllmWorker rank={rank}", pid)
         _add_prefix(sys.stderr, f"VllmWorker rank={rank}", pid)
diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py
index bb5a36f38386b..c74d8c543f76c 100644
--- a/vllm/v1/utils.py
+++ b/vllm/v1/utils.py
@@ -15,8 +15,8 @@ import torch
 from vllm.logger import init_logger
 from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
                                   usage_message)
-from vllm.utils import (bind_process_name, get_open_port,
-                        get_open_zmq_ipc_path, get_tcp_uri, kill_process_tree)
+from vllm.utils import (get_open_port, get_open_zmq_ipc_path, get_tcp_uri,
+                        kill_process_tree)
 
 if TYPE_CHECKING:
     from vllm.v1.engine.coordinator import DPCoordinator
@@ -144,7 +144,7 @@ class APIServerProcessManager:
         self.listen_address = listen_address
         self.sock = sock
         self.args = args
-        bind_process_name(self.__class__.__name__)
+
         # Start API servers
         spawn_context = multiprocessing.get_context("spawn")
         self.processes: list[BaseProcess] = []

From a2480251ec92ba2a849464dde48db8a2b7f6ef81 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Tue, 29 Jul 2025 14:53:18 +0800
Subject: [PATCH 008/224] [Doc] Link to RFC for pooling optimizations (#21806)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 docs/models/pooling_models.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/models/pooling_models.md b/docs/models/pooling_models.md
index a06d86523af1a..f1200103171e9 100644
--- a/docs/models/pooling_models.md
+++ b/docs/models/pooling_models.md
@@ -7,9 +7,9 @@ These models use a [Pooler][vllm.model_executor.layers.pooler.Pooler] to extract
 before returning them.
 
 !!! note
-    We currently support pooling models primarily as a matter of convenience.
-    As shown in the [Compatibility Matrix](../features/compatibility_matrix.md), most vLLM features are not applicable to
-    pooling models as they only work on the generation or decode stage, so performance may not improve as much.
+    We currently support pooling models primarily as a matter of convenience. This is not guaranteed to have any performance improvement over using HF Transformers / Sentence Transformers directly.
+
+    We are now planning to optimize pooling models in vLLM. Please comment on <gh-issue:21796> if you have any suggestions!
 
 ## Configuration
 

From a4528f0cac5d2857ccc56d2a2e1a1c43142643ce Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Tue, 29 Jul 2025 18:13:27 +0800
Subject: [PATCH 009/224] [Model]: Fused MoE for nomic-embed-text-v2-moe
 (#18321)

Signed-off-by: isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
---
 .../layers/fused_moe/fused_moe.py             |  47 +++-
 vllm/model_executor/models/bert_with_rope.py  | 204 +++++++++---------
 2 files changed, 140 insertions(+), 111 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 1985e8612da35..227aacf25c0b0 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -7,6 +7,7 @@ import os
 from typing import Any, Callable, Optional
 
 import torch
+import torch.nn.functional as F
 
 import vllm.envs as envs
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
@@ -1001,6 +1002,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor,
                           topk_weights: torch.Tensor,
                           topk_ids: torch.Tensor,
                           activation: str = "silu",
+                          is_act_and_mul: bool = True,
                           apply_router_weight_on_input: bool = False,
                           use_fp8_w8a8: bool = False,
                           use_int8_w8a8: bool = False,
@@ -1018,7 +1020,8 @@ def inplace_fused_experts(hidden_states: torch.Tensor,
                           a2_scale: Optional[torch.Tensor] = None,
                           block_shape: Optional[list[int]] = None) -> None:
     fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids, True,
-                       activation, apply_router_weight_on_input, use_fp8_w8a8,
+                       activation, is_act_and_mul,
+                       apply_router_weight_on_input, use_fp8_w8a8,
                        use_int8_w8a8, use_int8_w8a16, use_int4_w4a16,
                        use_mxfp4_w4a4, per_channel_quant, global_num_experts,
                        expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale,
@@ -1032,6 +1035,7 @@ def inplace_fused_experts_fake(
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
         activation: str = "silu",
+        is_act_and_mul: bool = True,
         apply_router_weight_on_input: bool = False,
         use_fp8_w8a8: bool = False,
         use_int8_w8a8: bool = False,
@@ -1167,6 +1171,7 @@ def outplace_fused_experts(
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
         activation: str = "silu",
+        is_act_and_mul: bool = True,
         apply_router_weight_on_input: bool = False,
         use_fp8_w8a8: bool = False,
         use_int8_w8a8: bool = False,
@@ -1183,13 +1188,12 @@ def outplace_fused_experts(
         a1_scale: Optional[torch.Tensor] = None,
         a2_scale: Optional[torch.Tensor] = None,
         block_shape: Optional[list[int]] = None) -> torch.Tensor:
-    return fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids,
-                              False, activation, apply_router_weight_on_input,
-                              use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16,
-                              use_int4_w4a16, use_mxfp4_w4a4,
-                              per_channel_quant, global_num_experts,
-                              expert_map, w1_scale, w2_scale, w1_zp, w2_zp,
-                              a1_scale, a2_scale, block_shape)
+    return fused_experts_impl(
+        hidden_states, w1, w2, topk_weights, topk_ids, False, activation,
+        is_act_and_mul, apply_router_weight_on_input, use_fp8_w8a8,
+        use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, use_mxfp4_w4a4,
+        per_channel_quant, global_num_experts, expert_map, w1_scale, w2_scale,
+        w1_zp, w2_zp, a1_scale, a2_scale, block_shape)
 
 
 def outplace_fused_experts_fake(
@@ -1199,6 +1203,7 @@ def outplace_fused_experts_fake(
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
         activation: str = "silu",
+        is_act_and_mul: bool = True,
         use_fp8_w8a8: bool = False,
         use_int8_w8a8: bool = False,
         use_int8_w8a16: bool = False,
@@ -1253,6 +1258,7 @@ def fused_experts(
         topk_ids: torch.Tensor,
         inplace: bool = False,
         activation: str = "silu",
+        is_act_and_mul: bool = True,
         apply_router_weight_on_input: bool = False,
         use_fp8_w8a8: bool = False,
         use_int8_w8a8: bool = False,
@@ -1283,6 +1289,8 @@ def fused_experts(
                             or is_blackwell_deep_gemm_used())
     if (allow_deep_gemm and use_fp8_w8a8 and should_use_deep_gemm):
         assert apply_router_weight_on_input is False
+        assert is_act_and_mul, (
+            "DeepGemm only supports is_act_and_mul=True for now.")
         return deep_gemm_moe_fp8(
             hidden_states=hidden_states,
             w1=w1,
@@ -1319,6 +1327,7 @@ def fused_experts(
             topk_weights=topk_weights,
             topk_ids=topk_ids,
             activation=activation,
+            is_act_and_mul=is_act_and_mul,
             apply_router_weight_on_input=apply_router_weight_on_input,
             use_fp8_w8a8=use_fp8_w8a8,
             use_int8_w8a8=use_int8_w8a8,
@@ -1345,6 +1354,7 @@ def fused_experts_impl(
     topk_ids: torch.Tensor,
     inplace: bool = False,
     activation: str = "silu",
+    is_act_and_mul: bool = True,
     apply_router_weight_on_input: bool = False,
     use_fp8_w8a8: bool = False,
     use_int8_w8a8: bool = False,
@@ -1503,14 +1513,21 @@ def fused_experts_impl(
                                 per_channel_quant=per_channel_quant,
                                 block_shape=block_shape)
 
-        if activation == "silu":
+        # Activation function with multiplication
+        if activation == "silu" and is_act_and_mul:
             torch.ops._C.silu_and_mul(intermediate_cache2,
                                       intermediate_cache1.view(-1, N))
-        elif activation == "gelu":
+        elif activation == "gelu" and is_act_and_mul:
             torch.ops._C.gelu_and_mul(intermediate_cache2,
                                       intermediate_cache1.view(-1, N))
+        # Activation function without multiplication
+        elif activation == "silu":
+            intermediate_cache2 = F.silu(intermediate_cache1.view(-1, N))
+        elif activation == "gelu":
+            intermediate_cache2 = F.gelu(intermediate_cache1.view(-1, N))
         else:
-            raise ValueError(f"Unsupported FusedMoe activation: {activation}")
+            raise ValueError(f"Unsupported FusedMoe activation: {activation}, "
+                             f"with is_act_and_mul={is_act_and_mul}.")
 
         qintermediate_cache2, a2q_scale = moe_kernel_quantize_input(
             A=intermediate_cache2,
@@ -1555,6 +1572,7 @@ def fused_moe(
     renormalize: bool,
     inplace: bool = False,
     activation: str = "silu",
+    is_act_and_mul: bool = True,
     use_grouped_topk: bool = False,
     num_expert_group: Optional[int] = None,
     topk_group: Optional[int] = None,
@@ -1591,6 +1609,9 @@ def fused_moe(
         Defaults to False.
     - activation (str): The activation function to apply after the first
         MoE layer.
+    - is_act_and_mul (bool): If True, use activation-and-mul function for
+        activation (self-gated activation), otherwise use activation function
+        for activation (ungated activation).
     - num_expert_group: Optional[int]: additional parameter for grouped_topk
     - topk_group: Optional[int]: additional parameter for grouped_topk
     - use_grouped_topk: If True, use grouped_topk instead of fused_topk
@@ -1627,6 +1648,9 @@ def fused_moe(
     Returns:
     - torch.Tensor: The output tensor after applying the MoE layer.
     """
+    if not is_act_and_mul:
+        assert inplace is False, (
+            "is_act_and_mul=False is not supported with inplace=True")
 
     if use_grouped_topk:
         assert num_expert_group is not None and topk_group is not None
@@ -1647,6 +1671,7 @@ def fused_moe(
                          topk_ids,
                          inplace=inplace,
                          activation=activation,
+                         is_act_and_mul=is_act_and_mul,
                          use_fp8_w8a8=use_fp8_w8a8,
                          use_int8_w8a8=use_int8_w8a8,
                          use_int8_w8a16=use_int8_w8a16,
diff --git a/vllm/model_executor/models/bert_with_rope.py b/vllm/model_executor/models/bert_with_rope.py
index 0b7350f07d3f6..5249acbd84a56 100644
--- a/vllm/model_executor/models/bert_with_rope.py
+++ b/vllm/model_executor/models/bert_with_rope.py
@@ -10,9 +10,12 @@ from transformers import PretrainedConfig
 from vllm.attention import Attention, AttentionType
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CacheConfig, VllmConfig
-from vllm.distributed import get_tensor_model_parallel_world_size
+from vllm.distributed import (divide, get_tensor_model_parallel_rank,
+                              get_tensor_model_parallel_world_size,
+                              tensor_model_parallel_all_reduce)
 from vllm.model_executor.layers.activation import (get_act_and_mul_fn,
                                                    get_act_fn)
+from vllm.model_executor.layers.fused_moe import fused_moe
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                MergedColumnParallelLinear,
                                                QKVParallelLinear,
@@ -26,6 +29,8 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.models import SupportsV0Only
 from vllm.model_executor.models.interfaces import SupportsQuant
 from vllm.model_executor.models.utils import WeightsMapper
+from vllm.model_executor.utils import set_weight_attrs
+from vllm.platforms import current_platform
 from vllm.sequence import IntermediateTensors
 
 
@@ -201,114 +206,101 @@ class BertWithRopeMLP(nn.Module):
         return hidden_states
 
 
-class NomicRouter(nn.Module):
+class NomicMoE(nn.Module):
 
-    def __init__(self, hidden_size: int, moe_num_experts: int, moe_top_k: int):
+    def __init__(
+        self,
+        num_experts: int,
+        top_k: int,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_act: str,
+        params_dtype: Optional[torch.dtype] = None,
+        tp_size: Optional[int] = None,
+    ):
         super().__init__()
-        self.moe_top_k = moe_top_k
-        self.layer = ReplicatedLinear(hidden_size, moe_num_experts, bias=False)
 
-    def forward(
-        self, x: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.LongTensor]:
-        weights = self.layer(x.view(-1, x.shape[-1]))[0].softmax(
-            dim=-1, dtype=torch.float32)
-        top_weights, top_experts = torch.topk(weights, self.moe_top_k, dim=-1)
-        weights = weights.to(x.dtype)
-        top_weights = top_weights.to(x.dtype)
-        return weights, top_weights, top_experts  # type: ignore
-
-
-class NomicExpertMLP(nn.Module):
-
-    def __init__(self, hidden_size: int, ffn_hidden_size: int,
-                 moe_num_experts: int, ffn_act_fn: str):
-        super().__init__()
+        self.tp_size = tp_size or get_tensor_model_parallel_world_size()
+        self.num_total_experts = num_experts
+        self.top_k = top_k
         self.hidden_size = hidden_size
-        self.ffn_hidden_size = ffn_hidden_size
-        self.moe_num_experts = moe_num_experts
+        self.total_intermediate_size = intermediate_size
+        self.intermediate_size = divide(intermediate_size, self.tp_size)
+        self.hidden_act = hidden_act
 
+        if params_dtype is None:
+            params_dtype = torch.get_default_dtype()
+        self.params_dtype = params_dtype
+
+        self.router = ReplicatedLinear(self.hidden_size,
+                                       self.num_total_experts,
+                                       bias=False)
         self.w1 = nn.Parameter(
-            torch.empty(moe_num_experts * ffn_hidden_size, hidden_size))
+            torch.empty(self.num_total_experts,
+                        self.intermediate_size,
+                        self.hidden_size,
+                        device=current_platform.device_type,
+                        dtype=self.params_dtype))
         self.w2 = nn.Parameter(
-            torch.empty(moe_num_experts * ffn_hidden_size, hidden_size))
-        self.activation_fn = get_act_fn(ffn_act_fn)
+            torch.empty(self.num_total_experts,
+                        self.hidden_size,
+                        self.intermediate_size,
+                        device=current_platform.device_type,
+                        dtype=self.params_dtype))
+        self.bias = nn.Parameter(torch.zeros(self.hidden_size))
+        set_weight_attrs(self.w1, {
+            "weight_loader": self.weight_loader,
+        })
+        set_weight_attrs(self.w2, {
+            "weight_loader": self.weight_loader,
+        })
 
-    def forward(self, x: torch.Tensor, expert_idx: int) -> torch.Tensor:
-        expert_w1 = self.w1.view(self.moe_num_experts, self.ffn_hidden_size,
-                                 self.hidden_size)[expert_idx]
-        expert_w2 = self.w2.view(self.moe_num_experts, self.ffn_hidden_size,
-                                 self.hidden_size)[expert_idx]
+    def weight_loader(
+        self,
+        param: nn.Parameter,
+        loaded_weight: torch.Tensor,
+        weight_name: str,
+    ):
+        # NOTE: Nomic-MoE has fused experts weights with shape
+        # (num_experts * intermediate_size, hidden_size)
+        tp_rank = get_tensor_model_parallel_rank()
+        param_data = param.data
+        shard_size = self.intermediate_size
+        shard = slice(tp_rank * shard_size, (tp_rank + 1) * shard_size)
+        if weight_name.endswith("w1"):
+            loaded_weight = loaded_weight.reshape(
+                self.num_total_experts,
+                self.total_intermediate_size,
+                self.hidden_size,
+            )[:, shard]
+        if weight_name.endswith("w2"):
+            loaded_weight = loaded_weight.reshape(
+                self.num_total_experts,
+                self.total_intermediate_size,
+                self.hidden_size,
+            )[:, shard].transpose(1, 2)
+        param_data.copy_(loaded_weight)
 
-        x1 = x.matmul(expert_w1.t())
-        act_out = self.activation_fn(x1)
-        x2 = act_out.matmul(expert_w2)
-        return x2
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        num_tokens, hidden_size = hidden_states.shape
+        hidden_states = hidden_states.view(-1, self.hidden_size)
+        # router_logits: (num_tokens, n_experts)
+        router_logits, _ = self.router(hidden_states)
+        final_hidden_states = fused_moe(hidden_states,
+                                        self.w1,
+                                        self.w2,
+                                        router_logits,
+                                        self.top_k,
+                                        renormalize=False,
+                                        inplace=False,
+                                        activation=self.hidden_act,
+                                        is_act_and_mul=False)
 
+        if self.tp_size > 1:
+            final_hidden_states = tensor_model_parallel_all_reduce(
+                final_hidden_states)
 
-class NomicExperts(nn.Module):
-
-    def __init__(self, config, hidden_size: int, ffn_hidden_size: int,
-                 moe_num_experts: int):
-        super().__init__()
-        self.moe_num_experts = moe_num_experts
-
-        self.mlp = NomicExpertMLP(hidden_size=config.n_embd,
-                                  ffn_hidden_size=config.n_inner,
-                                  moe_num_experts=moe_num_experts,
-                                  ffn_act_fn=config.hidden_act)
-        self.bias = nn.Parameter(torch.zeros(config.n_embd))
-
-    def forward(self, x: torch.Tensor, weights: torch.Tensor,
-                top_weights: torch.Tensor,
-                top_experts: torch.LongTensor) -> torch.Tensor:
-        q_len, hidden_size = x.shape
-        x = x.view(-1, hidden_size)
-        out = torch.zeros_like(x)
-
-        expert_mask = nn.functional.one_hot(
-            top_experts, num_classes=self.moe_num_experts).permute(2, 1, 0)
-        for expert_idx in range(0, self.moe_num_experts):
-            topk_idx, token_idx = torch.where(expert_mask[expert_idx])
-            if token_idx.shape[0] == 0:
-                continue
-
-            token_list = token_idx.tolist()
-            topk_list = topk_idx.tolist()
-
-            expert_tokens = x[None, token_list].reshape(-1, hidden_size)
-            expert_out = self.mlp(
-                expert_tokens, expert_idx) * top_weights[token_list, topk_list,
-                                                         None]
-
-            out.index_add_(0, token_idx, expert_out)
-
-        out = out.reshape(q_len, hidden_size)
-        return out + self.bias
-
-
-class NomicMoELayer(nn.Module):
-
-    def __init__(self, config: PretrainedConfig):
-        super().__init__()
-
-        self.router = NomicRouter(
-            config.n_embd,
-            moe_num_experts=config.num_experts,
-            moe_top_k=config.moe_top_k,
-        )
-
-        self.experts = NomicExperts(
-            config,
-            hidden_size=config.n_embd,
-            ffn_hidden_size=config.n_inner,
-            moe_num_experts=config.num_experts,
-        )
-
-    def forward(self, x: torch.Tensor):
-        weights, top_weights, top_experts = self.router(x)
-        out = self.experts(x, weights, top_weights, top_experts)
-        return out
+        return final_hidden_states.view(num_tokens, hidden_size) + self.bias
 
 
 class BertWithRopeBlock(nn.Module):
@@ -332,7 +324,11 @@ class BertWithRopeBlock(nn.Module):
             prefix=f"{prefix}.attention")
 
         if moe:
-            self.mlp = NomicMoELayer(config=config, )
+            self.mlp = NomicMoE(num_experts=config.num_experts,
+                                top_k=config.moe_top_k,
+                                hidden_size=config.hidden_size,
+                                intermediate_size=config.intermediate_size,
+                                hidden_act=config.hidden_act)
         else:
             if config.hidden_act in ["silu", "geglu"]:
                 self.mlp = BertWithRopeGatedMLP(
@@ -463,7 +459,11 @@ class BertWithRope(nn.Module, SupportsV0Only, SupportsQuant):
                 param = params_dict[name]
                 weight_loader = getattr(param, "weight_loader",
                                         default_weight_loader)
-                weight_loader(param, loaded_weight)
+                if name.endswith((".w1", ".w2")):
+                    # Nomic-MoE has fused experts weights
+                    weight_loader(param, loaded_weight, name)
+                else:
+                    weight_loader(param, loaded_weight)
             loaded_params.add(name)
         return loaded_params
 
@@ -481,6 +481,10 @@ class NomicBertModel(BertWithRope):
             "mlp.fc12": "mlp.gate_proj",
             "mlp.fc2": "mlp.down_proj",
             "norm2": "mlp_ln",
+            # MoE mapping
+            "experts.mlp.": "",
+            "experts.": "",
+            "router.layer": "router",
         })
 
 

From 37efc63b644b2f4e3b08bf7ff198dd8cd4c3f354 Mon Sep 17 00:00:00 2001
From: Reza Barazesh <3146276+rzabarazesh@users.noreply.github.com>
Date: Tue, 29 Jul 2025 03:15:30 -0700
Subject: [PATCH 010/224] [V0 deprecation] Guided decoding (#21347)

Signed-off-by: Reza Barazesh <rezabarazesh@meta.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .buildkite/test-pipeline.yaml                 |   3 +-
 .github/CODEOWNERS                            |   3 -
 .github/mergify.yml                           |   3 -
 tests/entrypoints/llm/test_guided_generate.py | 552 ------------------
 tests/entrypoints/llm/test_lazy_outlines.py   |  52 +-
 tests/entrypoints/openai/test_chat.py         | 151 +----
 tests/entrypoints/openai/test_completion.py   |  36 +-
 .../openai/test_prompt_validation.py          |  26 +-
 .../model_executor/test_guided_processors.py  | 207 -------
 .../language/generation/test_mistral.py       |  51 +-
 tests/samplers/test_no_bad_words.py           |   6 +-
 tests/test_sampling_params.py                 |   3 +-
 tests/v1/test_oracle.py                       |   7 -
 tools/check_pickle_imports.py                 |   1 -
 vllm/config.py                                |  20 +-
 vllm/engine/arg_utils.py                      |  24 +-
 vllm/engine/async_llm_engine.py               |  66 +--
 vllm/engine/llm_engine.py                     |  48 +-
 vllm/engine/multiprocessing/client.py         |  18 -
 vllm/entrypoints/llm.py                       |  76 +--
 .../guided_decoding/__init__.py               | 192 ------
 .../guided_decoding/guidance_decoding.py      |  63 --
 .../guidance_logits_processors.py             | 104 ----
 .../guided_decoding/guided_fields.py          |  41 --
 .../lm_format_enforcer_decoding.py            |  67 ---
 .../guided_decoding/outlines_decoding.py      | 117 ----
 .../outlines_logits_processors.py             | 307 ----------
 vllm/model_executor/guided_decoding/utils.py  | 242 --------
 .../guided_decoding/xgrammar_decoding.py      | 426 --------------
 29 files changed, 103 insertions(+), 2809 deletions(-)
 delete mode 100644 tests/entrypoints/llm/test_guided_generate.py
 delete mode 100644 tests/model_executor/test_guided_processors.py
 delete mode 100644 vllm/model_executor/guided_decoding/__init__.py
 delete mode 100644 vllm/model_executor/guided_decoding/guidance_decoding.py
 delete mode 100644 vllm/model_executor/guided_decoding/guidance_logits_processors.py
 delete mode 100644 vllm/model_executor/guided_decoding/guided_fields.py
 delete mode 100644 vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py
 delete mode 100644 vllm/model_executor/guided_decoding/outlines_decoding.py
 delete mode 100644 vllm/model_executor/guided_decoding/outlines_logits_processors.py
 delete mode 100644 vllm/model_executor/guided_decoding/utils.py
 delete mode 100644 vllm/model_executor/guided_decoding/xgrammar_decoding.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index ac145453dabde..6cda800b6477d 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -128,11 +128,10 @@ steps:
   - tests/entrypoints/offline_mode
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_guided_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
+  - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_collective_rpc.py
   - pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
-  - VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
   - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
 
 - label: Entrypoints Test (API Server) # 40min
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 2441055371663..a3b2713430eb5 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -10,7 +10,6 @@
 /vllm/worker/worker.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill
 /vllm/model_executor/layers/sampler.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill
 /vllm/model_executor/layers/quantization @mgoin @robertgshaw2-redhat @tlrmchlsmth
-/vllm/model_executor/guided_decoding @mgoin @russellb @aarnphm
 /vllm/multimodal @DarkLight1337 @ywang96
 /vllm/vllm_flash_attn @LucasWilkinson
 /vllm/lora @jeejeelee
@@ -35,9 +34,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 /tests/distributed/test_pipeline_parallel.py @youkaichao
 /tests/distributed/test_same_node.py @youkaichao
 /tests/entrypoints @DarkLight1337 @robertgshaw2-redhat @simon-mo @aarnphm
-/tests/entrypoints/llm/test_guided_generate.py @mgoin @russellb @aarnphm
 /tests/kernels @tlrmchlsmth @WoosukKwon
-/tests/model_executor/test_guided_processors.py @mgoin @russellb
 /tests/models @DarkLight1337 @ywang96
 /tests/multi_step @alexm-redhat @comaniac
 /tests/multimodal @DarkLight1337 @ywang96
diff --git a/.github/mergify.yml b/.github/mergify.yml
index 5c878ac02069f..d8ae509e0ac30 100644
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -149,9 +149,6 @@ pull_request_rules:
       - files=examples/offline_inference/structured_outputs.py
       - files=examples/online_serving/openai_chat_completion_structured_outputs.py
       - files=examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
-      - files~=^vllm/model_executor/guided_decoding/
-      - files=tests/model_executor/test_guided_processors.py
-      - files=tests/entrypoints/llm/test_guided_generate.py
       - files~=^tests/v1/structured_output/
       - files=tests/v1/entrypoints/llm/test_guided_generate.py
       - files~=^vllm/v1/structured_output/
diff --git a/tests/entrypoints/llm/test_guided_generate.py b/tests/entrypoints/llm/test_guided_generate.py
deleted file mode 100644
index 55578341cb2e7..0000000000000
--- a/tests/entrypoints/llm/test_guided_generate.py
+++ /dev/null
@@ -1,552 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import json
-import weakref
-from enum import Enum
-
-import jsonschema
-import pytest
-import regex as re
-from pydantic import BaseModel
-
-from vllm.distributed import cleanup_dist_env_and_memory
-from vllm.entrypoints.llm import LLM
-from vllm.outputs import RequestOutput
-from vllm.sampling_params import GuidedDecodingParams, SamplingParams
-
-MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
-
-# Separate backends which support grammars vs ones
-# which only support regex based constraints in tests.
-GRAMMAR_DECODING_BACKENDS = [
-    # (backend, disable_any_whitespace),
-    ("lm-format-enforcer", False),
-    ("xgrammar", True),
-    ("guidance", True),
-]
-
-ALL_DECODING_BACKENDS = ([("outlines", False)] + GRAMMAR_DECODING_BACKENDS)
-
-
-@pytest.fixture(scope="module")
-def llm():
-    # pytest caches the fixture so we use weakref.proxy to
-    # enable garbage collection
-    llm = LLM(model=MODEL_NAME, max_model_len=1024, seed=0)
-
-    with llm.deprecate_legacy_api():
-        yield weakref.proxy(llm)
-        del llm
-    cleanup_dist_env_and_memory()
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         ALL_DECODING_BACKENDS)
-def test_guided_regex(sample_regex, llm, guided_decoding_backend: str,
-                      disable_any_whitespace: bool):
-    sampling_params = SamplingParams(
-        temperature=0.8,
-        top_p=0.95,
-        guided_decoding=GuidedDecodingParams(
-            regex=sample_regex,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-
-    outputs = llm.generate(prompts=[
-        f"Give an example IPv4 address with this regex: {sample_regex}"
-    ] * 2,
-                           sampling_params=sampling_params,
-                           use_tqdm=True)
-
-    assert outputs is not None
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-        generated_text = output.outputs[0].text
-        print(generated_text)
-        assert generated_text is not None
-        assert re.fullmatch(sample_regex, generated_text) is not None
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         ALL_DECODING_BACKENDS)
-def test_guided_json_completion(sample_json_schema, llm,
-                                guided_decoding_backend: str,
-                                disable_any_whitespace: bool):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(
-            json=sample_json_schema,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-    outputs = llm.generate(prompts=[
-        f"Give an example JSON for an employee profile "
-        f"that fits this schema: {sample_json_schema}"
-    ] * 2,
-                           sampling_params=sampling_params,
-                           use_tqdm=True)
-
-    assert outputs is not None
-
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-
-        generated_text = output.outputs[0].text
-        assert generated_text is not None
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-        output_json = json.loads(generated_text)
-        jsonschema.validate(instance=output_json, schema=sample_json_schema)
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         ALL_DECODING_BACKENDS)
-def test_guided_complex_json_completion(sample_complex_json_schema, llm,
-                                        guided_decoding_backend: str,
-                                        disable_any_whitespace: bool):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(
-            json=sample_complex_json_schema,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-    outputs = llm.generate(prompts=[
-        f"Give an example JSON for an assignment grade "
-        f"that fits this schema: {sample_complex_json_schema}"
-    ] * 2,
-                           sampling_params=sampling_params,
-                           use_tqdm=True)
-
-    assert outputs is not None
-
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-
-        generated_text = output.outputs[0].text
-        assert generated_text is not None
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-        output_json = json.loads(generated_text)
-        jsonschema.validate(instance=output_json,
-                            schema=sample_complex_json_schema)
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         ALL_DECODING_BACKENDS)
-def test_guided_definition_json_completion(sample_definition_json_schema, llm,
-                                           guided_decoding_backend: str,
-                                           disable_any_whitespace: bool):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(
-            json=sample_definition_json_schema,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-    outputs = llm.generate(prompts=[
-        f"Give an example JSON for solving 8x + 7 = -23 "
-        f"that fits this schema: {sample_definition_json_schema}"
-    ] * 2,
-                           sampling_params=sampling_params,
-                           use_tqdm=True)
-
-    assert outputs is not None
-
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-
-        generated_text = output.outputs[0].text
-        assert generated_text is not None
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-        output_json = json.loads(generated_text)
-        jsonschema.validate(instance=output_json,
-                            schema=sample_definition_json_schema)
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         ALL_DECODING_BACKENDS)
-def test_guided_enum_json_completion(sample_enum_json_schema, llm,
-                                     guided_decoding_backend: str,
-                                     disable_any_whitespace: bool):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(
-            json=sample_enum_json_schema,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-    outputs = llm.generate(prompts=[
-        "Create a bug report JSON that fits this schema: "
-        f"{sample_enum_json_schema}. Make it for a high priority critical bug."
-    ] * 2,
-                           sampling_params=sampling_params,
-                           use_tqdm=True)
-
-    assert outputs is not None
-
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-
-        generated_text = output.outputs[0].text
-        assert generated_text is not None
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-        output_json = json.loads(generated_text)
-        jsonschema.validate(instance=output_json,
-                            schema=sample_enum_json_schema)
-
-        # Additional assertions to verify enum values
-        assert output_json["status"] in ["active", "inactive", "pending"]
-        assert output_json["priority"] in ["low", "medium", "high", "critical"]
-        assert output_json["category"]["type"] in [
-            "bug", "feature", "improvement"
-        ]
-        assert output_json["category"]["severity"] in [1, 2, 3, 4, 5]
-        for flag in output_json["flags"]:
-            assert flag in ["urgent", "blocked", "needs_review", "approved"]
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         ALL_DECODING_BACKENDS)
-def test_guided_choice_completion(sample_guided_choice, llm,
-                                  guided_decoding_backend: str,
-                                  disable_any_whitespace: bool):
-    sampling_params = SamplingParams(
-        temperature=0.8,
-        top_p=0.95,
-        guided_decoding=GuidedDecodingParams(
-            choice=sample_guided_choice,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-    outputs = llm.generate(
-        prompts="The best language for type-safe systems programming is ",
-        sampling_params=sampling_params,
-        use_tqdm=True)
-
-    assert outputs is not None
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-        generated_text = output.outputs[0].text
-        print(generated_text)
-        assert generated_text is not None
-        assert generated_text in sample_guided_choice
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GRAMMAR_DECODING_BACKENDS)
-def test_guided_grammar(sample_sql_statements, llm,
-                        guided_decoding_backend: str,
-                        disable_any_whitespace: bool):
-    sampling_params = SamplingParams(
-        temperature=0.8,
-        top_p=0.95,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(
-            grammar=sample_sql_statements,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-    outputs = llm.generate(
-        prompts=("Generate a sql state that select col_1 from "
-                 "table_1 where it is equals to 1"),
-        sampling_params=sampling_params,
-        use_tqdm=True,
-    )
-
-    assert outputs is not None
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-
-        generated_text = output.outputs[0].text
-        assert generated_text is not None
-        # use Lark to parse the output, and make sure it's a valid parse tree
-        from lark import Lark
-        parser = Lark(sample_sql_statements)
-        parser.parse(generated_text)
-
-        # remove spaces for comparison b/c we removed them in the grammar
-        ground_truth = "SELECT col_1 from table_1 where col_1 = 1".replace(
-            " ", "")
-
-        assert generated_text.strip() == ground_truth
-
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-
-
-@pytest.mark.skip_global_cleanup
-def test_guided_options_request_deprecation_warning(sample_regex, llm):
-    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
-
-    with pytest.warns(DeprecationWarning, match="guided_options_request"):
-        llm.generate(prompts="This should fail",
-                     sampling_params=sampling_params,
-                     use_tqdm=True,
-                     guided_options_request=dict(guided_regex=sample_regex))
-
-
-@pytest.mark.skip_global_cleanup
-def test_validation_against_both_guided_decoding_options(sample_regex, llm):
-    sampling_params = SamplingParams(
-        temperature=0.8,
-        top_p=0.95,
-        guided_decoding=GuidedDecodingParams(regex=sample_regex))
-
-    with pytest.raises(ValueError, match="Cannot set both"):
-        llm.generate(prompts="This should fail",
-                     sampling_params=sampling_params,
-                     use_tqdm=True,
-                     guided_options_request=dict(guided_regex=sample_regex))
-
-
-@pytest.mark.skip_global_cleanup
-def test_disable_guided_decoding_fallback(sample_regex, llm):
-    # see has_xgrammar_unsupported_json_features()
-    unsupported_json = {
-        "type": "object",
-        "properties": {
-            "example": {
-                "type": "string",
-                "minLength": 5  # unsupported by xgrammar
-            }
-        }
-    }
-    sampling_params = SamplingParams(temperature=0.8,
-                                     top_p=0.95,
-                                     guided_decoding=GuidedDecodingParams(
-                                         json=unsupported_json,
-                                         backend="xgrammar",
-                                         disable_fallback=True))
-
-    with pytest.raises(
-            ValueError,
-            match="xgrammar does not support advanced JSON schema features "
-            "like string length, item limits, or property bounds."):
-        llm.generate(prompts="This should fail",
-                     sampling_params=sampling_params,
-                     use_tqdm=True)
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GRAMMAR_DECODING_BACKENDS)
-def test_guided_json_object(llm, guided_decoding_backend: str,
-                            disable_any_whitespace: bool):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=100,
-        n=2,
-        guided_decoding=GuidedDecodingParams(
-            json_object=True,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-
-    outputs = llm.generate(
-        prompts=("Generate a JSON object with curly braces for a person with "
-                 "name and age fields for John Smith who is 31 years old."),
-        sampling_params=sampling_params,
-        use_tqdm=True)
-
-    assert outputs is not None
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-
-        for i in range(2):
-            generated_text = output.outputs[i].text
-            print(generated_text)
-            assert generated_text is not None
-
-            if disable_any_whitespace:
-                assert "\n" not in generated_text
-
-            # Parse to verify it is valid JSON
-            parsed_json = json.loads(generated_text)
-            # A list is not what was intended, but is still valid
-            # json.
-            assert isinstance(parsed_json, (dict, list))
-
-
-class CarType(str, Enum):
-    sedan = "sedan"
-    suv = "SUV"
-    truck = "Truck"
-    coupe = "Coupe"
-
-
-class CarDescription(BaseModel):
-    brand: str
-    model: str
-    car_type: CarType
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         ALL_DECODING_BACKENDS)
-def test_guided_json_completion_with_enum(llm, guided_decoding_backend: str,
-                                          disable_any_whitespace: bool):
-    json_schema = CarDescription.model_json_schema()
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(
-            json=json_schema,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace))
-    outputs = llm.generate(
-        prompts="Generate a JSON with the brand, model and car_type of"
-        "the most iconic car from the 90's",
-        sampling_params=sampling_params,
-        use_tqdm=True)
-
-    assert outputs is not None
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-
-        generated_text = output.outputs[0].text
-        assert generated_text is not None
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-        output_json = json.loads(generated_text)
-        jsonschema.validate(instance=output_json, schema=json_schema)
-
-
-@pytest.mark.skip_global_cleanup
-@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         ALL_DECODING_BACKENDS)
-def test_guided_number_range_json_completion(llm, guided_decoding_backend: str,
-                                             disable_any_whitespace: bool):
-    sample_output_schema = {
-        "type": "object",
-        "properties": {
-            "age": {
-                "type": "integer",
-                "minimum": 18,
-                "maximum": 99
-            },
-            "score": {
-                "type": "number",
-                "minimum": 0.0,
-                "maximum": 100.0
-            },
-            "zipcode": {
-                "type": "string",
-                "pattern": r"^\d{5}(-\d{4})?$"
-            },
-        },
-        "required": ["age", "score", "zipcode"],
-    }
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(
-            json=sample_output_schema,
-            backend=guided_decoding_backend,
-            disable_any_whitespace=disable_any_whitespace),
-    )
-    outputs = llm.generate(
-        prompts=[
-            "Create a JSON object for a user with age, score, and zipcode."
-        ] * 2,
-        sampling_params=sampling_params,
-        use_tqdm=True,
-    )
-
-    assert outputs is not None
-
-    for output in outputs:
-        assert output is not None
-        assert isinstance(output, RequestOutput)
-        prompt = output.prompt
-
-        generated_text = output.outputs[0].text
-        assert generated_text is not None
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-        output_json = json.loads(generated_text)
-        jsonschema.validate(instance=output_json, schema=sample_output_schema)
-        assert 18 <= output_json["age"] <= 99
-        assert 0.0 <= output_json["score"] <= 100.0
-        assert (re.fullmatch(r"^\d{5}(-\d{4})?$", output_json["zipcode"])
-                is not None)
-
-
-@pytest.mark.skip_global_cleanup
-def test_guidance_no_additional_properties(llm):
-    schema = {
-        'type': 'object',
-        'properties': {
-            'a1': {
-                'type': 'string'
-            },
-            'a2': {
-                'type': 'string'
-            },
-            'a3': {
-                'type': 'string'
-            }
-        },
-        'required': ['a1', 'a2', 'a3'],
-    }
-
-    prompt = (
-        "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a "
-        "helpful assistant.<|im_end|>\n<|im_start|>user\nPlease generate a "
-        "large JSON object with key-value pairs a1=b1, a2=b2, ..., a20=b20"
-        "<|im_end|>\n<|im_start|>assistant\n")
-
-    def generate_with_backend(backend, disable_additional_properties):
-        guided_params = GuidedDecodingParams(
-            json=schema,
-            backend=backend,
-            disable_any_whitespace=True,
-            disable_additional_properties=disable_additional_properties)
-        sampling_params = SamplingParams(temperature=0,
-                                         max_tokens=256,
-                                         guided_decoding=guided_params)
-
-        outputs = llm.generate(prompts=prompt, sampling_params=sampling_params)
-        assert outputs is not None
-        generated_text = outputs[0].outputs[0].text
-        assert generated_text is not None
-        parsed_json = json.loads(generated_text)
-        assert isinstance(parsed_json, dict)
-        jsonschema.validate(instance=parsed_json, schema=schema)
-        return parsed_json
-
-    base_generated = generate_with_backend("guidance", False)
-    assert "a1" in base_generated
-    assert "a2" in base_generated
-    assert "a3" in base_generated
-    # by default additional keys are generated
-    assert "a4" in base_generated
-    assert "a5" in base_generated
-    assert "a6" in base_generated
-
-    generated = generate_with_backend("guidance", True)
-    assert "a1" in generated
-    assert "a2" in generated
-    assert "a3" in generated
-    assert "a4" not in generated
-    assert "a5" not in generated
-    assert "a6" not in generated
diff --git a/tests/entrypoints/llm/test_lazy_outlines.py b/tests/entrypoints/llm/test_lazy_outlines.py
index 61b6b4fbf8e35..ac0b7e134c55a 100644
--- a/tests/entrypoints/llm/test_lazy_outlines.py
+++ b/tests/entrypoints/llm/test_lazy_outlines.py
@@ -4,43 +4,11 @@
 import sys
 from contextlib import nullcontext
 
-import pytest
 from vllm_test_utils import BlameResult, blame
 
 from vllm import LLM, SamplingParams
 from vllm.distributed import cleanup_dist_env_and_memory
-
-
-@pytest.fixture(scope="function", autouse=True)
-def use_v0_only(monkeypatch):
-    """
-    V1 only supports xgrammar so this is irrelevant.
-    """
-    monkeypatch.setenv('VLLM_USE_V1', '0')
-
-
-def run_normal_opt125m():
-    prompts = [
-        "Hello, my name is",
-        "The president of the United States is",
-        "The capital of France is",
-        "The future of AI is",
-    ]
-    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
-
-    # Create an LLM without guided decoding as a baseline.
-    llm = LLM(model="facebook/opt-125m",
-              enforce_eager=True,
-              gpu_memory_utilization=0.3)
-    outputs = llm.generate(prompts, sampling_params)
-    for output in outputs:
-        prompt = output.prompt
-        generated_text = output.outputs[0].text
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-
-    # Destroy the LLM object and free up the GPU memory.
-    del llm
-    cleanup_dist_env_and_memory()
+from vllm.sampling_params import GuidedDecodingParams
 
 
 def run_normal():
@@ -67,20 +35,22 @@ def run_normal():
     cleanup_dist_env_and_memory()
 
 
-def run_lmfe(sample_regex):
+def run_xgrammar(sample_regex):
     # Create an LLM with guided decoding enabled.
     llm = LLM(model="distilbert/distilgpt2",
               enforce_eager=True,
-              guided_decoding_backend="lm-format-enforcer",
+              guided_decoding_backend="xgrammar",
               gpu_memory_utilization=0.3)
-    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+    prompt = f"Give an example IPv4 address with this regex: {sample_regex}"
+    guided_decoding = GuidedDecodingParams(regex=sample_regex)
+    sampling_params = SamplingParams(temperature=0.8,
+                                     top_p=0.95,
+                                     guided_decoding=guided_decoding)
     outputs = llm.generate(
-        prompts=[
-            f"Give an example IPv4 address with this regex: {sample_regex}"
-        ] * 2,
+        prompts=[prompt] * 2,
         sampling_params=sampling_params,
         use_tqdm=True,
-        guided_options_request=dict(guided_regex=sample_regex))
+    )
 
     for output in outputs:
         prompt = output.prompt
@@ -103,7 +73,7 @@ def test_lazy_outlines(sample_regex):
         lambda: module_name in sys.modules) if use_blame else nullcontext()
     with context as result:
         run_normal()
-        run_lmfe(sample_regex)
+        run_xgrammar(sample_regex)
     if use_blame:
         assert isinstance(result, BlameResult)
         print(f"the first import location is:\n{result.trace_stack}")
diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py
index e7c3ffaa6a9f2..5ad29d70f10df 100644
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -488,7 +488,9 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
 
 @pytest.mark.asyncio
 async def test_guided_choice_chat(client: openai.AsyncOpenAI,
-                                  sample_guided_choice):
+                                  sample_guided_choice, is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Guided decoding is only supported in v1 engine")
     messages = [{
         "role": "system",
         "content": "you are a helpful assistant"
@@ -524,8 +526,10 @@ async def test_guided_choice_chat(client: openai.AsyncOpenAI,
 
 
 @pytest.mark.asyncio
-async def test_guided_json_chat(client: openai.AsyncOpenAI,
-                                sample_json_schema):
+async def test_guided_json_chat(client: openai.AsyncOpenAI, sample_json_schema,
+                                is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Guided decoding is only supported in v1 engine")
 
     messages = [{
         "role": "system",
@@ -568,7 +572,10 @@ async def test_guided_json_chat(client: openai.AsyncOpenAI,
 
 
 @pytest.mark.asyncio
-async def test_guided_regex_chat(client: openai.AsyncOpenAI, sample_regex):
+async def test_guided_regex_chat(client: openai.AsyncOpenAI, sample_regex,
+                                 is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Guided decoding is only supported in v1 engine")
 
     messages = [{
         "role": "system",
@@ -653,7 +660,10 @@ async def test_guided_choice_chat_logprobs(client: openai.AsyncOpenAI,
 
 
 @pytest.mark.asyncio
-async def test_named_tool_use(client: openai.AsyncOpenAI, sample_json_schema):
+async def test_named_tool_use(client: openai.AsyncOpenAI, sample_json_schema,
+                              is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Tool use is only supported in v1 engine")
     messages = [{
         "role": "system",
         "content": "you are a helpful assistant"
@@ -741,131 +751,6 @@ async def test_named_tool_use(client: openai.AsyncOpenAI, sample_json_schema):
     assert json1["age"] != json2["age"]
 
 
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_required_tool_use(client: openai.AsyncOpenAI,
-                                 is_v1_server: bool, model_name: str):
-    if is_v1_server:
-        pytest.skip(
-            "tool_choice='required' requires features unsupported on V1")
-
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_current_weather",
-                "description": "Get the current weather in a given location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "city": {
-                            "type": "string",
-                            "description":
-                            "The city to find the weather for, e.g. 'Vienna'",
-                            "default": "Vienna",
-                        },
-                        "country": {
-                            "type":
-                            "string",
-                            "description":
-                            "The country that the city is in, e.g. 'Austria'",
-                        },
-                        "unit": {
-                            "type": "string",
-                            "description":
-                            "The unit to fetch the temperature in",
-                            "enum": ["celsius", "fahrenheit"],
-                        },
-                    },
-                    "required": ["country", "unit"],
-                },
-            },
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "get_forecast",
-                "description": "Get the weather forecast for a given location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "city": {
-                            "type": "string",
-                            "description":
-                            "The city to get the forecast for, e.g. 'Vienna'",
-                            "default": "Vienna",
-                        },
-                        "country": {
-                            "type":
-                            "string",
-                            "description":
-                            "The country that the city is in, e.g. 'Austria'",
-                        },
-                        "days": {
-                            "type":
-                            "integer",
-                            "description":
-                            "Number of days to get the forecast for (1-7)",
-                        },
-                        "unit": {
-                            "type": "string",
-                            "description":
-                            "The unit to fetch the temperature in",
-                            "enum": ["celsius", "fahrenheit"],
-                        },
-                    },
-                    "required": ["country", "days", "unit"],
-                },
-            },
-        },
-    ]
-
-    messages = [
-        {
-            "role": "user",
-            "content": "Hi! How are you doing today?"
-        },
-        {
-            "role": "assistant",
-            "content": "I'm doing well! How can I help you?"
-        },
-        {
-            "role":
-            "user",
-            "content":
-            "Can you tell me what the current weather is in Berlin and the "\
-            "forecast for the next 5 days, in fahrenheit?",
-        },
-    ]
-
-    # Non-streaming test
-    chat_completion = await client.chat.completions.create(
-        messages=messages,
-        model=model_name,
-        tools=tools,
-        tool_choice="required",
-    )
-
-    assert chat_completion.choices[0].message.tool_calls is not None
-    assert len(chat_completion.choices[0].message.tool_calls) > 0
-
-    # Streaming test
-    stream = await client.chat.completions.create(
-        messages=messages,
-        model=model_name,
-        tools=tools,
-        tool_choice="required",
-        stream=True,
-    )
-
-    output = []
-    async for chunk in stream:
-        if chunk.choices and chunk.choices[0].delta.tool_calls:
-            output.extend(chunk.choices[0].delta.tool_calls)
-
-    assert len(output) > 0
-
-
 @pytest.mark.asyncio
 async def test_inconsistent_tool_choice_and_tools(client: openai.AsyncOpenAI,
                                                   sample_json_schema):
@@ -948,7 +833,11 @@ async def test_response_format_json_object(client: openai.AsyncOpenAI):
 
 
 @pytest.mark.asyncio
-async def test_response_format_json_schema(client: openai.AsyncOpenAI):
+async def test_response_format_json_schema(client: openai.AsyncOpenAI,
+                                           is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip(
+            "JSON schema response format is only supported in v1 engine")
     prompt = 'what is 1+1? The format is "result": 2'
     # Check that this prompt cannot lead to a valid JSON without json_schema
     for _ in range(2):
diff --git a/tests/entrypoints/openai/test_completion.py b/tests/entrypoints/openai/test_completion.py
index 6eca3e767f3f0..74ef6deeea16b 100644
--- a/tests/entrypoints/openai/test_completion.py
+++ b/tests/entrypoints/openai/test_completion.py
@@ -28,7 +28,7 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 # but we're not testing generation quality here
 LORA_NAME = "typeof/zephyr-7b-beta-lora"
 
-GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"]
+GUIDED_DECODING_BACKENDS = ["outlines", "xgrammar", "guidance"]
 
 
 @pytest.fixture(scope="module")
@@ -95,6 +95,14 @@ def server(default_server_args, request):
             os.environ['VLLM_USE_V1'] = original_value
 
 
+@pytest.fixture
+def is_v1_server(server):
+    import os
+
+    # For completion tests, we assume v0 since there's no explicit v1 setup
+    return os.environ.get('VLLM_USE_V1', '0') == '1'
+
+
 @pytest_asyncio.fixture
 async def client(server):
     async with server.get_async_client() as async_client:
@@ -631,7 +639,10 @@ async def test_allowed_token_ids(client: openai.AsyncOpenAI):
 @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
 async def test_guided_json_completion(client: openai.AsyncOpenAI,
                                       guided_decoding_backend: str,
-                                      sample_json_schema):
+                                      sample_json_schema, is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Guided decoding is only supported in v1 engine")
+
     completion = await client.completions.create(
         model=MODEL_NAME,
         prompt=f"Give an example JSON for an employee profile "
@@ -653,7 +664,10 @@ async def test_guided_json_completion(client: openai.AsyncOpenAI,
 @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
 async def test_guided_regex_completion(client: openai.AsyncOpenAI,
                                        guided_decoding_backend: str,
-                                       sample_regex):
+                                       sample_regex, is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Guided decoding is only supported in v1 engine")
+
     completion = await client.completions.create(
         model=MODEL_NAME,
         prompt=f"Give an example IPv4 address with this regex: {sample_regex}",
@@ -674,7 +688,11 @@ async def test_guided_regex_completion(client: openai.AsyncOpenAI,
 @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
 async def test_guided_choice_completion(client: openai.AsyncOpenAI,
                                         guided_decoding_backend: str,
-                                        sample_guided_choice):
+                                        sample_guided_choice,
+                                        is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Guided decoding is only supported in v1 engine")
+
     completion = await client.completions.create(
         model=MODEL_NAME,
         prompt="The best language for type-safe systems programming is ",
@@ -692,7 +710,9 @@ async def test_guided_choice_completion(client: openai.AsyncOpenAI,
 
 @pytest.mark.asyncio
 async def test_guided_grammar(client: openai.AsyncOpenAI,
-                              sample_sql_statements):
+                              sample_sql_statements, is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Guided grammar is only supported in v1 engine")
 
     completion = await client.completions.create(
         model=MODEL_NAME,
@@ -754,7 +774,11 @@ async def test_echo_logprob_completion(client: openai.AsyncOpenAI,
 @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
 async def test_guided_decoding_type_error(client: openai.AsyncOpenAI,
                                           guided_decoding_backend: str,
-                                          sample_json_schema, sample_regex):
+                                          sample_json_schema, sample_regex,
+                                          is_v1_server: bool):
+    if not is_v1_server:
+        pytest.skip("Guided decoding is only supported in v1 engine")
+
     with pytest.raises(openai.BadRequestError):
         _ = await client.completions.create(
             model=MODEL_NAME,
diff --git a/tests/entrypoints/openai/test_prompt_validation.py b/tests/entrypoints/openai/test_prompt_validation.py
index ff0730c77032c..e31a1d077608f 100644
--- a/tests/entrypoints/openai/test_prompt_validation.py
+++ b/tests/entrypoints/openai/test_prompt_validation.py
@@ -9,6 +9,11 @@ import regex as re
 from ...utils import RemoteOpenAIServer
 
 
+@pytest.fixture(scope="function", autouse=True)
+def use_v1_only(monkeypatch):
+    monkeypatch.setenv('VLLM_USE_V1', '1')
+
+
 @pytest.mark.asyncio
 async def test_empty_prompt():
     model_name = "gpt2"
@@ -37,24 +42,3 @@ async def test_out_of_vocab_token_ids():
                                             prompt=[999999],
                                             max_tokens=5,
                                             temperature=0.0)
-
-
-@pytest.mark.asyncio
-async def test_reject_multistep_with_guided_decoding():
-    model_name = "gpt2"
-    server_args = ["--enforce-eager", "--num-scheduler-steps", "8"]
-    with RemoteOpenAIServer(model_name, server_args) as remote_server:
-        client = remote_server.get_async_client()
-
-        with pytest.raises(
-                openai.BadRequestError,
-                match=re.compile(
-                    '.*Guided decoding .* multi-step decoding.*').pattern):
-            await client.completions.create(
-                model=model_name,
-                prompt="Hello",
-                max_tokens=5,
-                temperature=0.0,
-                extra_body={"response_format": {
-                    "type": "json_object"
-                }})
diff --git a/tests/model_executor/test_guided_processors.py b/tests/model_executor/test_guided_processors.py
deleted file mode 100644
index 2cf0ba2fe6866..0000000000000
--- a/tests/model_executor/test_guided_processors.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import json
-import pickle
-
-import pytest
-import torch
-from transformers import AutoTokenizer
-
-from vllm.config import ModelConfig
-from vllm.model_executor.guided_decoding import (
-    get_guided_decoding_logits_processor,
-    get_local_guided_decoding_logits_processor)
-from vllm.model_executor.guided_decoding.outlines_logits_processors import (
-    JSONLogitsProcessor, RegexLogitsProcessor)
-from vllm.sampling_params import GuidedDecodingParams
-
-MODEL_NAME = 'HuggingFaceH4/zephyr-7b-beta'
-GUIDED_DECODING_BACKENDS = [
-    "outlines", "lm-format-enforcer", "xgrammar", "guidance"
-]
-GUIDED_DECODING_BACKENDS_WITH_REASONING_SUPPORT = ["outlines", "xgrammar"]
-REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
-
-
-# Initialize the tokenizer for the model here to avoid repeated loading
-@pytest.fixture(scope="module")
-def zephyr_7B_tokenzer():
-    return AutoTokenizer.from_pretrained(MODEL_NAME)
-
-
-@pytest.fixture(scope="module")
-def deepseek_r1_qwen_tokenizer():
-    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
-
-
-def test_guided_logits_processors(zephyr_7B_tokenzer, sample_regex,
-                                  sample_json_schema):
-    """Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor."""
-    regex_LP = RegexLogitsProcessor(sample_regex,
-                                    zephyr_7B_tokenzer,
-                                    reasoner=None)
-    json_LP = JSONLogitsProcessor(sample_json_schema,
-                                  zephyr_7B_tokenzer,
-                                  whitespace_pattern=None,
-                                  reasoner=None)
-
-    tensor = torch.rand(32000)
-    original_tensor = torch.clone(tensor)
-    tensor = regex_LP([], tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)
-
-    tensor = torch.rand(32000)
-    original_tensor = torch.clone(tensor)
-    tensor = json_LP([], tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("backend", GUIDED_DECODING_BACKENDS)
-@pytest.mark.parametrize("is_local", [True, False])
-async def test_guided_logits_processor_black_box(backend: str, is_local: bool,
-                                                 sample_regex,
-                                                 sample_json_schema,
-                                                 zephyr_7B_tokenzer):
-
-    config = ModelConfig(
-        MODEL_NAME,
-        runner="generate",
-        seed=0,
-        dtype="bfloat16",
-    )
-    regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend)
-
-    regex_lp = get_local_guided_decoding_logits_processor(
-            regex_request, zephyr_7B_tokenzer, config) if is_local else \
-            await get_guided_decoding_logits_processor(
-                    regex_request, zephyr_7B_tokenzer, config)
-    assert regex_lp is not None
-    tensor = torch.rand(32000)
-    original_tensor = torch.clone(tensor)
-    # allowed tokens at state 0
-    tensor = regex_lp([], tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)
-
-    json_request = GuidedDecodingParams(json=sample_json_schema,
-                                        backend=backend)
-    json_lp = await get_guided_decoding_logits_processor(
-        json_request, zephyr_7B_tokenzer, config)
-    assert json_lp is not None
-    tensor = torch.rand(32000)
-    original_tensor = torch.clone(tensor)
-    tensor = json_lp([], tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("backend",
-                         GUIDED_DECODING_BACKENDS_WITH_REASONING_SUPPORT)
-@pytest.mark.parametrize("is_local", [True, False])
-@pytest.mark.parametrize("reasoning_backend", ["deepseek_r1"])
-async def test_guided_logits_processor_with_reasoning(
-        backend: str, is_local: bool, reasoning_backend: str, sample_regex,
-        sample_json_schema, deepseek_r1_qwen_tokenizer):
-
-    config = ModelConfig(
-        REASONING_MODEL_NAME,
-        runner="generate",
-        seed=0,
-        dtype="bfloat16",
-    )
-    token_ids = deepseek_r1_qwen_tokenizer.encode(
-        "<think>here is the thinking process")
-    regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend)
-
-    regex_lp = get_local_guided_decoding_logits_processor(regex_request,
-                    deepseek_r1_qwen_tokenizer, config,
-                    reasoning_backend) if is_local else \
-            await get_guided_decoding_logits_processor(
-                    regex_request, deepseek_r1_qwen_tokenizer, config,
-                    reasoning_backend)
-    assert regex_lp is not None
-    tensor = torch.rand(151664)
-    original_tensor = torch.clone(tensor)
-    tensor = regex_lp(token_ids, tensor)
-    assert tensor.shape == original_tensor.shape
-    assert torch.allclose(tensor, original_tensor)
-
-    token_ids = deepseek_r1_qwen_tokenizer.encode(
-        "<think>here is the thinking process")
-    json_request = GuidedDecodingParams(json=sample_json_schema,
-                                        backend=backend)
-    json_lp = get_local_guided_decoding_logits_processor(
-        json_request, deepseek_r1_qwen_tokenizer, config,
-        reasoning_backend) if is_local else \
-        await get_guided_decoding_logits_processor(
-            json_request, deepseek_r1_qwen_tokenizer, config, reasoning_backend)
-    assert json_lp is not None
-    tensor = torch.rand(151664)
-    original_tensor = torch.clone(tensor)
-    tensor = json_lp(token_ids, tensor)
-    assert tensor.shape == original_tensor.shape
-    assert torch.allclose(tensor, original_tensor)
-
-    # Thinking is over, so the tensor should change.
-    token_ids = deepseek_r1_qwen_tokenizer.encode(
-        "<think>here is the thinking process</think>")
-    json_request = GuidedDecodingParams(json=sample_json_schema,
-                                        backend=backend)
-    json_lp = get_local_guided_decoding_logits_processor(
-        json_request, deepseek_r1_qwen_tokenizer, config,
-        reasoning_backend) if is_local else \
-        await get_guided_decoding_logits_processor(
-            json_request, deepseek_r1_qwen_tokenizer, config, reasoning_backend)
-    assert json_lp is not None
-    tensor = torch.rand(151664)
-    original_tensor = torch.clone(tensor)
-    tensor = json_lp(token_ids, tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)
-
-
-def test_multiple_guided_options_not_allowed(sample_json_schema, sample_regex):
-    with pytest.raises(ValueError,
-                       match="You can only use one kind of guided"):
-        GuidedDecodingParams(json=sample_json_schema, regex=sample_regex)
-
-    with pytest.raises(ValueError,
-                       match="You can only use one kind of guided"):
-        GuidedDecodingParams(json=sample_json_schema, json_object=True)
-
-    with pytest.raises(ValueError,
-                       match="You can only use one kind of guided"):
-        GuidedDecodingParams(json=sample_json_schema, choice=["a", "b"])
-
-    with pytest.raises(ValueError,
-                       match="You can only use one kind of guided"):
-        GuidedDecodingParams(json=sample_json_schema, grammar="test grammar")
-
-
-def test_pickle_xgrammar_tokenizer_data():
-    try:
-        import xgrammar as xgr
-    except ImportError:
-        pytest.skip("Could not import xgrammar to run test")
-
-    from vllm.model_executor.guided_decoding.xgrammar_decoding import (
-        TokenizerData)
-    tokenizer_data = TokenizerData(
-        metadata=
-        '{"vocab_type":2,"vocab_size":151665,"add_prefix_space":false,"stop_token_ids":[151645]}',
-        encoded_vocab=['!', '"', '#', '$', '%'],
-    )
-    pickled = pickle.dumps(tokenizer_data)
-
-    assert pickled is not None
-
-    depickled: TokenizerData = pickle.loads(pickled)
-
-    assert depickled is not None
-    assert json.loads(
-        depickled.metadata)['vocab_type'] == xgr.VocabType.BYTE_LEVEL.value
diff --git a/tests/models/language/generation/test_mistral.py b/tests/models/language/generation/test_mistral.py
index 81a88f2d485eb..af51a60edfd62 100644
--- a/tests/models/language/generation/test_mistral.py
+++ b/tests/models/language/generation/test_mistral.py
@@ -3,13 +3,11 @@
 import copy
 import json
 
-import jsonschema
-import jsonschema.exceptions
 import pytest
 
 from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import (
     MistralToolCall, MistralToolParser)
-from vllm.sampling_params import GuidedDecodingParams, SamplingParams
+from vllm.sampling_params import SamplingParams
 from vllm.transformers_utils.tokenizer import MistralTokenizer
 
 from ...utils import check_logprobs_close
@@ -274,53 +272,6 @@ def test_mistral_function_calling(vllm_runner, model: str, dtype: str) -> None:
         assert parsed_message.content is None
 
 
-@pytest.mark.parametrize("model", MODELS)
-@pytest.mark.parametrize("guided_backend",
-                         ["outlines", "lm-format-enforcer", "xgrammar"])
-def test_mistral_guided_decoding(
-    monkeypatch: pytest.MonkeyPatch,
-    vllm_runner,
-    model: str,
-    guided_backend: str,
-) -> None:
-    with monkeypatch.context() as m:
-        # Guided JSON not supported in xgrammar + V1 yet
-        m.setenv("VLLM_USE_V1", "0")
-
-        with vllm_runner(
-                model,
-                dtype='bfloat16',
-                tokenizer_mode="mistral",
-                guided_decoding_backend=guided_backend,
-        ) as vllm_model:
-            guided_decoding = GuidedDecodingParams(json=SAMPLE_JSON_SCHEMA)
-            params = SamplingParams(max_tokens=512,
-                                    temperature=0.7,
-                                    guided_decoding=guided_decoding)
-
-            messages = [{
-                "role": "system",
-                "content": "you are a helpful assistant"
-            }, {
-                "role":
-                "user",
-                "content":
-                f"Give an example JSON for an employee profile that "
-                f"fits this schema: {SAMPLE_JSON_SCHEMA}"
-            }]
-            outputs = vllm_model.llm.chat(messages, sampling_params=params)
-
-        generated_text = outputs[0].outputs[0].text
-        json_response = json.loads(generated_text)
-        assert outputs is not None
-
-        try:
-            jsonschema.validate(instance=json_response,
-                                schema=SAMPLE_JSON_SCHEMA)
-        except jsonschema.exceptions.ValidationError:
-            pytest.fail("Generated response is not valid with JSON schema")
-
-
 def test_mistral_function_call_nested_json():
     """Ensure that the function-name regex captures the entire outer-most
     JSON block, including nested braces."""
diff --git a/tests/samplers/test_no_bad_words.py b/tests/samplers/test_no_bad_words.py
index 11803b8d7a5eb..128e8f552a161 100644
--- a/tests/samplers/test_no_bad_words.py
+++ b/tests/samplers/test_no_bad_words.py
@@ -14,9 +14,9 @@ from vllm import LLM, SamplingParams
 
 
 @pytest.fixture(autouse=True)
-def v1(run_with_both_engines):
-    """We can run both engines for this test."""
-    pass
+def v1(monkeypatch):
+    """Only run on vLLM v1."""
+    monkeypatch.setenv('VLLM_USE_V1', '1')
 
 
 def _generate(
diff --git a/tests/test_sampling_params.py b/tests/test_sampling_params.py
index be6427dd6bde5..7330f61e67689 100644
--- a/tests/test_sampling_params.py
+++ b/tests/test_sampling_params.py
@@ -56,8 +56,7 @@ def test_sampling_params_from_request_with_no_guided_decoding_backend(
 
 
 @pytest.mark.parametrize("request_level_guided_decoding_backend,expected",
-                         [("xgrammar", "xgrammar"),
-                          ("lm-format-enforcer", "lm-format-enforcer"),
+                         [("xgrammar", "xgrammar"), ("guidance", "guidance"),
                           ("outlines", "outlines")])
 def test_sampling_params_from_request_with_guided_decoding_backend(
         request_level_guided_decoding_backend: str, expected: str,
diff --git a/tests/v1/test_oracle.py b/tests/v1/test_oracle.py
index cc59287a9fbe6..b68ed298a1895 100644
--- a/tests/v1/test_oracle.py
+++ b/tests/v1/test_oracle.py
@@ -47,13 +47,6 @@ def test_unsupported_configs(monkeypatch):
                 },
             ).create_engine_config()
 
-        with pytest.raises(NotImplementedError):
-            AsyncEngineArgs(
-                model=MODEL,
-                guided_decoding_backend="lm-format-enforcer",
-                guided_decoding_disable_fallback=True,
-            ).create_engine_config()
-
         with pytest.raises(NotImplementedError):
             AsyncEngineArgs(
                 model=MODEL,
diff --git a/tools/check_pickle_imports.py b/tools/check_pickle_imports.py
index ef197d1fbace1..5e99dc63ebe0c 100644
--- a/tools/check_pickle_imports.py
+++ b/tools/check_pickle_imports.py
@@ -34,7 +34,6 @@ ALLOWED_FILES = set([
     'vllm/model_executor/models/registry.py',
     'tests/test_utils.py',
     'tests/tokenization/test_cached_tokenizer.py',
-    'tests/model_executor/test_guided_processors.py',
     'vllm/distributed/utils.py',
     'vllm/distributed/parallel_state.py',
     'vllm/engine/multiprocessing/client.py',
diff --git a/vllm/config.py b/vllm/config.py
index 3bcbbe60652b7..7ae615f477057 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1774,8 +1774,8 @@ class CacheConfig:
     - "builtin" is Python's built-in hash.\n
     - "sha256" is collision resistant but with certain overheads.
     This option uses Pickle for object serialization before hashing.\n
-    - "sha256_cbor_64bit" provides a reproducible, cross-language compatible 
-    hash. It serializes objects using canonical CBOR and hashes them with 
+    - "sha256_cbor_64bit" provides a reproducible, cross-language compatible
+    hash. It serializes objects using canonical CBOR and hashes them with
     SHA-256. The resulting hash consists of the lower 64 bits of the SHA-256
     digest."""
     cpu_offload_gb: float = 0
@@ -3721,12 +3721,7 @@ def get_served_model_name(model: str,
     return served_model_name
 
 
-GuidedDecodingBackendV0 = Literal["auto", "outlines", "lm-format-enforcer",
-                                  "xgrammar", "guidance"]
-
-GuidedDecodingBackendV1 = Literal["auto", "xgrammar", "guidance", "outlines"]
-GuidedDecodingBackend = Literal[GuidedDecodingBackendV0,
-                                GuidedDecodingBackendV1]
+GuidedDecodingBackend = Literal["auto", "xgrammar", "guidance", "outlines"]
 
 
 @config
@@ -3734,7 +3729,7 @@ GuidedDecodingBackend = Literal[GuidedDecodingBackendV0,
 class DecodingConfig:
     """Dataclass which contains the decoding strategy of the engine."""
 
-    backend: GuidedDecodingBackend = "auto" if envs.VLLM_USE_V1 else "xgrammar"
+    backend: GuidedDecodingBackend = "auto"
     """Which engine will be used for guided decoding (JSON schema / regex etc)
     by default. With "auto", we will make opinionated choices based on request
     contents and what the backend libraries currently support, so the behavior
@@ -3776,13 +3771,6 @@ class DecodingConfig:
         return hash_str
 
     def __post_init__(self):
-        if envs.VLLM_USE_V1:
-            valid_guided_backends = get_args(GuidedDecodingBackendV1)
-        else:
-            valid_guided_backends = get_args(GuidedDecodingBackendV0)
-        if self.backend not in valid_guided_backends:
-            raise ValueError(f"Invalid backend '{self.backend}',"
-                             f" must be one of {valid_guided_backends}")
         if (self.disable_any_whitespace
                 and self.backend not in ("xgrammar", "guidance")):
             raise ValueError("disable_any_whitespace is only supported for "
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index d4d6001a428d2..6bdc3c361af34 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -25,14 +25,14 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
                          ConfigFormat, ConfigType, ConvertOption,
                          DecodingConfig, DetailedTraceModules, Device,
                          DeviceConfig, DistributedExecutorBackend,
-                         GuidedDecodingBackend, GuidedDecodingBackendV1,
-                         HfOverrides, KVEventsConfig, KVTransferConfig,
-                         LoadConfig, LogprobsMode, LoRAConfig, ModelConfig,
-                         ModelDType, ModelImpl, MultiModalConfig,
-                         ObservabilityConfig, ParallelConfig, PoolerConfig,
-                         PrefixCachingHashAlgo, RunnerOption, SchedulerConfig,
-                         SchedulerPolicy, SpeculativeConfig, TaskOption,
-                         TokenizerMode, VllmConfig, get_attr_docs, get_field)
+                         GuidedDecodingBackend, HfOverrides, KVEventsConfig,
+                         KVTransferConfig, LoadConfig, LogprobsMode,
+                         LoRAConfig, ModelConfig, ModelDType, ModelImpl,
+                         MultiModalConfig, ObservabilityConfig, ParallelConfig,
+                         PoolerConfig, PrefixCachingHashAlgo, RunnerOption,
+                         SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
+                         TaskOption, TokenizerMode, VllmConfig, get_attr_docs,
+                         get_field)
 from vllm.logger import init_logger
 from vllm.platforms import CpuArchEnum, current_platform
 from vllm.plugins import load_general_plugins
@@ -1343,14 +1343,6 @@ class EngineArgs:
                                recommend_to_remove=True)
             return False
 
-        if self.guided_decoding_backend not in get_args(
-                GuidedDecodingBackendV1):
-            _raise_or_fallback(
-                feature_name=
-                f"--guided-decoding-backend={self.guided_decoding_backend}",
-                recommend_to_remove=False)
-            return False
-
         # Need at least Ampere for now (FA support required).
         # Skip this check if we are running on a non-GPU platform,
         # or if the device capability is not available
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 39642d89167bd..06bb4eeab69eb 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import asyncio
-import copy
 import time
 import weakref
 from functools import partial
@@ -24,8 +23,6 @@ from vllm.inputs import PromptType
 from vllm.inputs.preprocess import InputPreprocessor
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
-from vllm.model_executor.guided_decoding import (
-    get_guided_decoding_logits_processor)
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.outputs import PoolingRequestOutput, RequestOutput
 from vllm.pooling_params import PoolingParams
@@ -469,19 +466,6 @@ class _AsyncLLMEngine(LLMEngine):
             tokenization_kwargs=tokenization_kwargs,
         )
 
-        if isinstance(params, SamplingParams) and \
-            params.guided_decoding is not None:
-            # Guided decoding has an async implementation for building logits
-            # processors in a separate threadpool.
-            # We want to invoke that here instead of using the blocking
-            # implementation in the LLMEngine
-            params = await build_guided_decoding_logits_processor_async(
-                sampling_params=params,
-                tokenizer=await self.get_tokenizer_async(lora_request),
-                default_guided_backend=self.decoding_config.backend,
-                reasoning_backend=self.decoding_config.reasoning_backend,
-                model_config=self.model_config)
-
         self._add_processed_request(
             request_id=request_id,
             processed_inputs=processed_inputs,
@@ -503,48 +487,6 @@ class _AsyncLLMEngine(LLMEngine):
         raise NotImplementedError
 
 
-async def build_guided_decoding_logits_processor_async(
-        sampling_params: SamplingParams, tokenizer: AnyTokenizer,
-        default_guided_backend: str, reasoning_backend: Optional[str],
-        model_config: ModelConfig) -> SamplingParams:
-    """Constructs logits processors based on the guided_decoding,
-    logits_bias, and allowed_token_ids fields in sampling_params. Deletes
-    those fields and adds the constructed logits processors to the
-    logits_processors field. Modifies sampling params in-place and returns
-    the modified sampling params."""
-    if sampling_params.guided_decoding is None:
-        return sampling_params
-
-    # Defensively copy sampling params since guided decoding logits
-    # processors can have different state for each request
-    sampling_params = copy.copy(sampling_params)
-    guided_decoding = sampling_params.guided_decoding
-
-    logger.debug(
-        "Building guided decoding logits processor. "
-        "guided_decoding: %s%s", guided_decoding,
-        f", reasoning_backend: {reasoning_backend}"
-        if reasoning_backend is not None else "")
-
-    guided_decoding.backend = guided_decoding.backend or default_guided_backend
-
-    processor = await get_guided_decoding_logits_processor(
-        guided_params=guided_decoding,
-        tokenizer=tokenizer,
-        reasoning_backend=reasoning_backend,
-        model_config=model_config)
-
-    if processor:
-        if sampling_params.logits_processors is None:
-            sampling_params.logits_processors = []
-        sampling_params.logits_processors.append(processor)
-
-    # Unset guided decoding params after constructing the lp from them
-    sampling_params.guided_decoding = None
-
-    return sampling_params
-
-
 class AsyncLLMEngine(EngineClient):
     """An asynchronous wrapper for [`LLMEngine`][vllm.LLMEngine].
 
@@ -1028,7 +970,7 @@ class AsyncLLMEngine(EngineClient):
         ```
         # Please refer to entrypoints/api_server.py for
         # the complete example.
-    
+
         # initialize the engine and the example input
         # note that engine_args here is AsyncEngineArgs instance
         engine = AsyncLLMEngine.from_engine_args(engine_args)
@@ -1036,13 +978,13 @@ class AsyncLLMEngine(EngineClient):
             "input": "What is LLM?",
             "request_id": 0,
         }
-    
+
         # start the generation
         results_generator = engine.encode(
         example_input["input"],
         PoolingParams(),
         example_input["request_id"])
-    
+
         # get the results
         final_output = None
         async for request_output in results_generator:
@@ -1052,7 +994,7 @@ class AsyncLLMEngine(EngineClient):
                 # Return or raise an error
                 ...
             final_output = request_output
-    
+
         # Process and return the final output
         ...
         ```
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index e7919d90442f9..3f30a34170ffe 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import copy
 import time
 from collections import Counter as collectionsCounter
 from collections import deque
@@ -36,8 +35,6 @@ from vllm.inputs.preprocess import InputPreprocessor
 from vllm.logger import init_logger
 from vllm.logits_process import get_bad_words_logits_processors
 from vllm.lora.request import LoRARequest
-from vllm.model_executor.guided_decoding import (
-    get_local_guided_decoding_logits_processor)
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
 from vllm.multimodal.processing import EncDecMultiModalProcessor
@@ -686,11 +683,10 @@ class LLMEngine:
                              "Priority scheduling is not enabled.")
 
         if isinstance(params, SamplingParams) \
-            and (params.guided_decoding or params.logits_processors) \
+            and params.logits_processors \
             and self.scheduler_config.num_scheduler_steps > 1:
             raise ValueError(
-                "Guided decoding and logits processors are not supported "
-                "in multi-step decoding")
+                "Logits processors are not supported in multi-step decoding")
 
         if arrival_time is None:
             arrival_time = time.time()
@@ -1226,7 +1222,7 @@ class LLMEngine:
         engine = LLMEngine.from_engine_args(engine_args)
         example_inputs = [(0, "What is LLM?",
         SamplingParams(temperature=0.0))]
-    
+
         # Start the engine with an event loop
         while True:
             if example_inputs:
@@ -1983,43 +1979,13 @@ class LLMEngine:
     def _build_logits_processors(
             self, sampling_params: SamplingParams,
             lora_request: Optional[LoRARequest]) -> SamplingParams:
-        """Constructs logits processors based on the guided_decoding,
-        logits_bias, and allowed_token_ids fields in sampling_params. Deletes
-        those fields and adds the constructed logits processors to the
-        logits_processors field. Returns the modified sampling params."""
+        """Constructs logits processors based on the logits_bias, and
+        allowed_token_ids fields in sampling_params. Deletes those fields and
+        adds the constructed logits processors to the logits_processors field.
+        Returns the modified sampling params."""
 
         logits_processors = []
 
-        if sampling_params.guided_decoding is not None:
-            # Defensively copy sampling params since guided decoding logits
-            # processors can have different state for each request
-            sampling_params = copy.copy(sampling_params)
-            guided_decoding = sampling_params.guided_decoding
-
-            logger.debug(
-                "Building guided decoding logits processor in "
-                "LLMEngine. Params: %s", guided_decoding)
-
-            tokenizer = self.get_tokenizer(lora_request=lora_request)
-            guided_decoding.backend = guided_decoding.backend or \
-                self.decoding_config.backend
-
-            if self.decoding_config.reasoning_backend:
-                logger.debug("Building with reasoning backend %s",
-                             self.decoding_config.reasoning_backend)
-
-            processor = get_local_guided_decoding_logits_processor(
-                guided_params=guided_decoding,
-                tokenizer=tokenizer,
-                model_config=self.model_config,
-                reasoning_backend=self.decoding_config.reasoning_backend,
-            )
-            if processor:
-                logits_processors.append(processor)
-
-            # Unset so this doesn't get passed down to the model
-            sampling_params.guided_decoding = None
-
         if (sampling_params.logit_bias or sampling_params.allowed_token_ids):
             tokenizer = self.get_tokenizer(lora_request=lora_request)
 
diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py
index cde8fc367fb54..f69f72edf6a52 100644
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@@ -20,8 +20,6 @@ from vllm.config import DecodingConfig, ModelConfig, VllmConfig
 from vllm.core.scheduler import SchedulerOutputs
 # yapf conflicts with isort for this block
 # yapf: disable
-from vllm.engine.async_llm_engine import (
-    build_guided_decoding_logits_processor_async)
 from vllm.engine.multiprocessing import (ENGINE_DEAD_ERROR, IPC_DATA_EXT,
                                          IPC_HEALTH_EXT, IPC_INPUT_EXT,
                                          IPC_OUTPUT_EXT, RPC_REQUEST_T,
@@ -537,22 +535,6 @@ class MQLLMEngineClient(EngineClient):
         if request_id in self.output_queues:
             raise ValueError(f"Request {request_id} already exists")
 
-        # Constructing guided decoding logits processors is expensive, so we do
-        # it here to avoid contending with cpu resources and the GIL on the
-        # backend process.
-        if isinstance(params, SamplingParams) and \
-            params.guided_decoding is not None:
-            params = await \
-                build_guided_decoding_logits_processor_async(
-                    sampling_params=params,
-                    tokenizer=await self.get_tokenizer(lora_request),
-                    default_guided_backend=(self.decoding_config.backend
-                        if self.decoding_config
-                        else DecodingConfig.backend),
-                    model_config=self.model_config,
-                    reasoning_backend=self.decoding_config.reasoning_backend,
-                )
-
         # 1) Create output queue for this requests.
         queue: asyncio.Queue[Union[RequestOutput,
                                    BaseException]] = asyncio.Queue()
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 04dd193966421..adef350931f3d 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import itertools
-import warnings
 from collections.abc import Sequence
 from contextlib import contextmanager
 from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union,
@@ -40,15 +39,13 @@ from vllm.inputs import PromptType, SingletonPrompt, TextPrompt, TokensPrompt
 from vllm.inputs.parse import parse_and_batch_prompt
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
-from vllm.model_executor.guided_decoding.guided_fields import (
-    GuidedDecodingRequest, LLMGuidedOptions)
 from vllm.model_executor.layers.quantization import QuantizationMethods
 from vllm.outputs import (ClassificationRequestOutput, EmbeddingRequestOutput,
                           PoolingRequestOutput, RequestOutput,
                           ScoringRequestOutput)
 from vllm.pooling_params import PoolingParams
-from vllm.sampling_params import (BeamSearchParams, GuidedDecodingParams,
-                                  RequestOutputKind, SamplingParams)
+from vllm.sampling_params import (BeamSearchParams, RequestOutputKind,
+                                  SamplingParams)
 from vllm.tasks import PoolingTask
 from vllm.transformers_utils.tokenizer import (AnyTokenizer, MistralTokenizer,
                                                get_cached_tokenizer)
@@ -330,8 +327,6 @@ class LLM:
         *,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        guided_options_request: Optional[Union[LLMGuidedOptions,
-                                               GuidedDecodingRequest]] = None,
     ) -> list[RequestOutput]:
         ...
 
@@ -345,8 +340,6 @@ class LLM:
         prompt_token_ids: Optional[list[int]] = None,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        guided_options_request: Optional[Union[LLMGuidedOptions,
-                                               GuidedDecodingRequest]] = None,
     ) -> list[RequestOutput]:
         ...
 
@@ -360,8 +353,6 @@ class LLM:
         prompt_token_ids: Optional[list[list[int]]] = None,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        guided_options_request: Optional[Union[LLMGuidedOptions,
-                                               GuidedDecodingRequest]] = None,
     ) -> list[RequestOutput]:
         ...
 
@@ -376,8 +367,6 @@ class LLM:
         prompt_token_ids: list[int],
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        guided_options_request: Optional[Union[LLMGuidedOptions,
-                                               GuidedDecodingRequest]] = None,
     ) -> list[RequestOutput]:
         ...
 
@@ -392,8 +381,6 @@ class LLM:
         prompt_token_ids: list[list[int]],
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        guided_options_request: Optional[Union[LLMGuidedOptions,
-                                               GuidedDecodingRequest]] = None,
     ) -> list[RequestOutput]:
         ...
 
@@ -406,8 +393,6 @@ class LLM:
         prompt_token_ids: Union[list[int], list[list[int]]],
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        guided_options_request: Optional[Union[LLMGuidedOptions,
-                                               GuidedDecodingRequest]] = None,
     ) -> list[RequestOutput]:
         ...
 
@@ -425,8 +410,6 @@ class LLM:
         prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        guided_options_request: Optional[Union[LLMGuidedOptions,
-                                               GuidedDecodingRequest]] = None,
         priority: Optional[list[int]] = None,
     ) -> list[RequestOutput]:
         """Generates the completions for the input prompts.
@@ -478,14 +461,6 @@ class LLM:
             parsed_prompts = cast(Union[PromptType, Sequence[PromptType]],
                                   prompts)
 
-        if isinstance(guided_options_request, dict):
-            if len(guided_options_request) > 1:
-                raise ValueError(
-                    "You can only use one guided decoding but multiple is "
-                    f"specified: {guided_options_request}")
-            guided_options_request = GuidedDecodingRequest(
-                **guided_options_request)
-
         if sampling_params is None:
             # Use default sampling params.
             sampling_params = self.get_default_sampling_params()
@@ -507,7 +482,6 @@ class LLM:
             params=sampling_params,
             use_tqdm=use_tqdm,
             lora_request=lora_request,
-            guided_options=guided_options_request,
             tokenization_kwargs=tokenization_kwargs,
             priority=priority,
         )
@@ -1361,17 +1335,17 @@ class LLM:
         of your inputs into a single list and pass it to this method.
 
         Supports both text and multi-modal data (images, etc.) when used with
-        appropriate multi-modal models. For multi-modal inputs, ensure the 
+        appropriate multi-modal models. For multi-modal inputs, ensure the
         prompt structure matches the model's expected input format.
 
         Args:
-            data_1: Can be a single prompt, a list of prompts or 
-                `ScoreMultiModalParam`, which can contain either text or 
-                multi-modal data. When a list, it must have the same length as 
+            data_1: Can be a single prompt, a list of prompts or
+                `ScoreMultiModalParam`, which can contain either text or
+                multi-modal data. When a list, it must have the same length as
                 the `data_2` list.
-            data_2: The data to pair with the query to form the input to 
+            data_2: The data to pair with the query to form the input to
                 the LLM. Can be text or multi-modal data. See [PromptType]
-                [vllm.inputs.PromptType] for more details about the format of 
+                [vllm.inputs.PromptType] for more details about the format of
                 each prompt.
             use_tqdm: If `True`, shows a tqdm progress bar.
                 If a callable (e.g., `functools.partial(tqdm, leave=False)`),
@@ -1582,17 +1556,8 @@ class LLM:
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[Sequence[LoRARequest], LoRARequest]],
         tokenization_kwargs: Optional[dict[str, Any]] = None,
-        guided_options: Optional[GuidedDecodingRequest] = None,
         priority: Optional[list[int]] = None,
     ) -> None:
-        if guided_options is not None:
-            warnings.warn(
-                "guided_options_request is deprecated, use "
-                "SamplingParams.guided_decoding instead",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
         if isinstance(prompts, (str, dict)):
             # Convert a single prompt to a list.
             prompts = [prompts]
@@ -1608,8 +1573,6 @@ class LLM:
 
         for sp in params if isinstance(params, Sequence) else (params, ):
             if isinstance(sp, SamplingParams):
-                self._add_guided_params(sp, guided_options)
-
                 # We only care about the final output
                 sp.output_kind = RequestOutputKind.FINAL_ONLY
 
@@ -1647,29 +1610,6 @@ class LLM:
             priority=priority,
         )
 
-    def _add_guided_params(
-            self,
-            params: SamplingParams,
-            guided_options: Optional[GuidedDecodingRequest] = None):
-        if guided_options is None:
-            return params
-
-        if params.guided_decoding is not None:
-            raise ValueError("Cannot set both guided_options_request and "
-                             "params.guided_decoding.")
-
-        params.guided_decoding = GuidedDecodingParams(
-            json=guided_options.guided_json,
-            regex=guided_options.guided_regex,
-            choice=guided_options.guided_choice,
-            grammar=guided_options.guided_grammar,
-            json_object=guided_options.guided_json_object,
-            backend=guided_options.guided_decoding_backend,
-            whitespace_pattern=guided_options.guided_whitespace_pattern,
-            structural_tag=guided_options.structural_tag,
-        )
-        return params
-
     def _run_engine(
         self,
         *,
diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py
deleted file mode 100644
index 7540e6344a498..0000000000000
--- a/vllm/model_executor/guided_decoding/__init__.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from vllm.logger import init_logger
-from vllm.model_executor.guided_decoding.utils import (
-    convert_lark_to_gbnf, grammar_is_likely_lark,
-    has_lmf_unsupported_json_features, has_xgrammar_unsupported_json_features)
-from vllm.reasoning import ReasoningParserManager
-
-if TYPE_CHECKING:
-    from transformers import PreTrainedTokenizer
-
-    from vllm.config import ModelConfig
-    from vllm.logits_process import LogitsProcessor
-    from vllm.sampling_params import GuidedDecodingParams
-
-logger = init_logger(__name__)
-
-
-def maybe_backend_fallback(
-        guided_params: GuidedDecodingParams) -> GuidedDecodingParams:
-
-    def fallback_or_error(guided_params: GuidedDecodingParams, message: str,
-                          fallback: str) -> None:
-        """Change the backend to the specified fallback with a warning log,
-        or raise a ValueError if the `disable_fallback` option is specified."""
-        if guided_params.disable_fallback:
-            raise ValueError(message)
-
-        logger.warning("%s Falling back to use %s instead.", message, fallback)
-        guided_params.backend = fallback
-
-    # `auto` was added for V1 to explicitly declare a mode that has fallbacks
-    # in place. If that is specified with V0, treat it as `xgrammar`, as we have
-    # fallbacks enabled for that and it is the V0 default.
-    if guided_params.backend == "auto":
-        guided_params.backend = "xgrammar"
-
-    # lm-format-enforce doesn't support grammar, fallback to xgrammar
-    if guided_params.backend == "lm-format-enforcer":
-        if guided_params.grammar is not None:
-            fallback_or_error(
-                guided_params,
-                "lm-format-enforcer does not support grammar guided decoding.",
-                "xgrammar")
-
-        # lm-format-enforcer doesn't support some JSON schema features
-        elif (guided_params.json is not None
-              and has_lmf_unsupported_json_features(guided_params.json)):
-            fallback_or_error(
-                guided_params,
-                "lm-format-enforcer does not support advanced JSON schema "
-                "features like patterns or numeric ranges.", "outlines")
-
-    if guided_params.backend == "xgrammar":
-        from vllm.model_executor.guided_decoding.xgrammar_decoding import (
-            xgr_installed)
-
-        # xgrammar doesn't support some JSON schema features
-        if (guided_params.json is not None and
-                has_xgrammar_unsupported_json_features(guided_params.json)):
-            fallback_or_error(
-                guided_params,
-                "xgrammar does not support advanced JSON schema features like "
-                "string length, item limits, or property bounds.", "outlines")
-
-        # xgrammar only supports GBNF grammars, so we must convert Lark.
-        # We must check if the grammar is likely Lark and if that
-        # grammar is convertible to GBNF
-        elif (guided_params.grammar is not None
-              and grammar_is_likely_lark(guided_params.grammar)):
-            try:
-                convert_lark_to_gbnf(guided_params.grammar)
-            except Exception:
-                fallback_or_error(
-                    guided_params,
-                    "xgrammar does not support Lark grammars and the "
-                    "grammar failed to convert to GBNF.", "guidance")
-
-        # If the xgrammar module cannot be imported successfully,
-        # we should still allow users to use guided decoding with a fallback.
-        elif not xgr_installed:
-            fallback_or_error(
-                guided_params,
-                "xgrammar module cannot be imported successfully.", "guidance")
-
-    if guided_params.backend == "outlines":
-        if guided_params.json_object is not None:
-            # outlines doesn't support json_object, fallback to guidance
-            fallback_or_error(guided_params,
-                              "outlines does not support json_object.",
-                              "guidance")
-        elif guided_params.grammar is not None:
-            # outlines grammar support has been removed, fallback to guidance
-            # if it is a lark-based grammar and xgrammar otherwise
-            if grammar_is_likely_lark(guided_params.grammar):
-                fallback_or_error(guided_params,
-                                  "outlines no longer supports grammars.",
-                                  "guidance")
-            else:
-                # The grammar is likely already GBNF format.
-                fallback_or_error(guided_params,
-                                  "outlines no longer supports grammars.",
-                                  "xgrammar")
-
-    return guided_params
-
-
-async def get_guided_decoding_logits_processor(
-        guided_params: GuidedDecodingParams,
-        tokenizer: PreTrainedTokenizer,
-        model_config: ModelConfig,
-        reasoning_backend: str | None = None) -> LogitsProcessor | None:
-
-    reasoner = None
-    if reasoning_backend:
-        reasoner_class = ReasoningParserManager.get_reasoning_parser(
-            reasoning_backend)
-        reasoner = reasoner_class(tokenizer)
-
-    guided_params = maybe_backend_fallback(guided_params)
-
-    if guided_params.backend == 'outlines':
-        # NOTE: lazy import outlines to avoid https://github.com/vllm-project/vllm/issues/4193
-        from vllm.model_executor.guided_decoding.outlines_decoding import (  # noqa
-            get_outlines_guided_decoding_logits_processor)
-        return await get_outlines_guided_decoding_logits_processor(
-            guided_params, tokenizer, reasoner)
-    if guided_params.backend == 'lm-format-enforcer':
-        from vllm.model_executor.guided_decoding.lm_format_enforcer_decoding import (  # noqa
-            get_local_lm_format_enforcer_guided_decoding_logits_processor)
-        return get_local_lm_format_enforcer_guided_decoding_logits_processor(
-            guided_params, tokenizer)
-    if guided_params.backend == 'xgrammar':
-        from vllm.model_executor.guided_decoding.xgrammar_decoding import (  # noqa
-            get_local_xgrammar_guided_decoding_logits_processor)
-        return get_local_xgrammar_guided_decoding_logits_processor(
-            guided_params, tokenizer, model_config, reasoner)
-    if guided_params.backend == 'guidance':
-        from vllm.model_executor.guided_decoding.guidance_decoding import (
-            get_local_guidance_guided_decoding_logits_processor)
-        return get_local_guidance_guided_decoding_logits_processor(
-            guided_params, tokenizer)
-    raise ValueError(
-        f"Unknown guided decoding backend '{guided_params.backend}'. "
-        "Must be one of 'outlines, 'lm-format-enforcer', 'xgrammar', 'guidance'"
-    )
-
-
-def get_local_guided_decoding_logits_processor(
-        guided_params: GuidedDecodingParams,
-        tokenizer: PreTrainedTokenizer,
-        model_config: ModelConfig,
-        reasoning_backend: str | None = None) -> LogitsProcessor | None:
-    guided_params = maybe_backend_fallback(guided_params)
-
-    reasoner = None
-    if reasoning_backend:
-        reasoner_class = ReasoningParserManager.get_reasoning_parser(
-            reasoning_backend)
-        reasoner = reasoner_class(tokenizer)
-
-    if guided_params.backend == 'outlines':
-        # NOTE: lazy import outlines to avoid https://github.com/vllm-project/vllm/issues/4193
-        from vllm.model_executor.guided_decoding.outlines_decoding import (  # noqa
-            get_local_outlines_guided_decoding_logits_processor)
-        return get_local_outlines_guided_decoding_logits_processor(
-            guided_params, tokenizer, reasoner)
-    if guided_params.backend == 'lm-format-enforcer':
-        from vllm.model_executor.guided_decoding.lm_format_enforcer_decoding import (  # noqa
-            get_local_lm_format_enforcer_guided_decoding_logits_processor)
-        return get_local_lm_format_enforcer_guided_decoding_logits_processor(
-            guided_params, tokenizer)
-    if guided_params.backend == 'xgrammar':
-        from vllm.model_executor.guided_decoding.xgrammar_decoding import (  # noqa
-            get_local_xgrammar_guided_decoding_logits_processor)
-        return get_local_xgrammar_guided_decoding_logits_processor(
-            guided_params, tokenizer, model_config, reasoner)
-    if guided_params.backend == 'guidance':
-        from vllm.model_executor.guided_decoding.guidance_decoding import (
-            get_local_guidance_guided_decoding_logits_processor)
-        return get_local_guidance_guided_decoding_logits_processor(
-            guided_params, tokenizer)
-
-    raise ValueError(
-        f"Unknown guided decoding backend '{guided_params.backend}'. "
-        "Must be one of 'outlines, 'lm-format-enforcer', 'xgrammar', 'guidance'"
-    )
diff --git a/vllm/model_executor/guided_decoding/guidance_decoding.py b/vllm/model_executor/guided_decoding/guidance_decoding.py
deleted file mode 100644
index 05b6a1c3239f1..0000000000000
--- a/vllm/model_executor/guided_decoding/guidance_decoding.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import json
-
-import llguidance
-from regex import escape as regex_escape
-from transformers import PreTrainedTokenizerBase
-
-from vllm.model_executor.guided_decoding.guidance_logits_processors import (
-    GuidanceLogitsProcessor)
-from vllm.sampling_params import GuidedDecodingParams
-from vllm.v1.structured_output.backend_guidance import (
-    process_for_additional_properties)
-
-
-def get_local_guidance_guided_decoding_logits_processor(
-        guided_params: GuidedDecodingParams,
-        tokenizer: PreTrainedTokenizerBase) -> GuidanceLogitsProcessor:
-    """
-    Given an OpenAI-compatible request, check for guided decoding parameters
-    and get the necessary logits processor for the given guide.
-    """
-
-    grm = ""
-    any_whitespace = not guided_params.disable_any_whitespace
-    if (guide_json := guided_params.json) is not None:
-        # Optionally set additionalProperties to False at the top-level
-        # By default, other backends do not allow additional top-level
-        # properties, so this makes guidance more similar to other backends
-        if guided_params.disable_additional_properties:
-            if not isinstance(guide_json, str):
-                guide_json = json.dumps(guide_json)
-            guide_json = process_for_additional_properties(guide_json)
-
-        grm = llguidance.LLMatcher.grammar_from_json_schema(
-            guide_json,
-            overrides={"whitespace_pattern": guided_params.whitespace_pattern},
-            defaults={
-                "whitespace_flexible": any_whitespace,
-            })
-    elif guided_params.json_object:
-        grm = llguidance.LLMatcher.grammar_from_json_schema(
-            '{"type": "object"}',
-            overrides={"whitespace_pattern": guided_params.whitespace_pattern},
-            defaults={
-                "whitespace_flexible": any_whitespace,
-            })
-    elif guided_params.regex:
-        grm = llguidance.grammar_from("regex", guided_params.regex)
-    elif guided_params.choice:
-        # choice just uses regex
-        choices = (regex_escape(str(choice))
-                   for choice in guided_params.choice)
-        choices_regex = "(" + "|".join(choices) + ")"
-        grm = llguidance.grammar_from("regex", choices_regex)
-    elif guided_params.grammar:
-        # this supports Lark and GBNF
-        grm = llguidance.grammar_from("grammar", guided_params.grammar)
-
-    if grm:
-        return GuidanceLogitsProcessor(grm, tokenizer)
-
-    raise ValueError("Unknown guided decoding mode")
diff --git a/vllm/model_executor/guided_decoding/guidance_logits_processors.py b/vllm/model_executor/guided_decoding/guidance_logits_processors.py
deleted file mode 100644
index 379b5eaa38a76..0000000000000
--- a/vllm/model_executor/guided_decoding/guidance_logits_processors.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import copy
-import os
-from typing import Any
-
-import llguidance
-import llguidance.hf
-import llguidance.torch
-import torch
-from transformers import PreTrainedTokenizerBase
-
-from vllm.logger import init_logger
-
-logger = init_logger(__name__)
-
-
-class GuidanceLogitsProcessor:
-    """Base Guidance Logits Processor"""
-
-    cached_tokenizers: dict[str, Any] = {}
-
-    def __init__(
-        self,
-        grammar: str,
-        tokenizer: PreTrainedTokenizerBase,
-    ) -> None:
-        """Base Guidance Logits Processor
-
-        Args:
-            grammar (str)
-                grammar to guide the generation
-            tokenizer (PreTrainedTokenizerBase)
-                model's tokenizer
-        """
-        self.grammar = grammar
-        self.tokenizer = tokenizer
-        self.tokenizer_name = tokenizer.name_or_path
-        self.ll_tokenizer = None
-        self.ll_matcher = None
-        self.bitmask = None
-        self.new_sampling = False
-        self.initialized = False
-
-    def clone(self) -> "GuidanceLogitsProcessor":
-        cloned = copy.copy(self)
-        if self.initialized:
-            cloned.ll_matcher = llguidance.LLMatcher(
-                self.ll_tokenizer,  # type: ignore[assignment]
-                self.grammar,
-                log_level=int(os.environ.get("LLGUIDANCE_LOG_LEVEL", "1")),
-            )
-            self.bitmask = llguidance.torch.allocate_token_bitmask(
-                1, self.ll_tokenizer.vocab_size)  # type: ignore[attr-defined]
-        return cloned
-
-    def _initialize(self):
-        if self.initialized:
-            return
-
-        ll_tokenizer = self.cached_tokenizers.get(self.tokenizer.name_or_path,
-                                                  None)
-        if ll_tokenizer is None:
-            ll_tokenizer = llguidance.hf.from_tokenizer(self.tokenizer, None)
-            self.cached_tokenizers[self.tokenizer.name_or_path] = ll_tokenizer
-
-        self.ll_tokenizer = ll_tokenizer
-        self.ll_matcher = llguidance.LLMatcher(
-            self.ll_tokenizer,
-            self.grammar,
-            log_level=int(os.environ.get("LLGUIDANCE_LOG_LEVEL", "1")),
-        )
-
-        # create reusable bitmask
-        self.bitmask = llguidance.torch.allocate_token_bitmask(
-            1, self.ll_tokenizer.vocab_size)  # type: ignore[attr-defined]
-
-        self.initialized = True
-
-    def __call__(
-        self,
-        input_ids: list[int],
-        scores: torch.Tensor,
-    ) -> torch.Tensor:
-        # we initialize the guidance model here
-        # to avoid pickling ll_tokenizer and ll_interpreter
-        self._initialize()
-
-        if self.new_sampling and len(input_ids) > 0:
-            self.ll_matcher.consume_token(  # type: ignore[attr-defined]
-                input_ids[-1])
-            err = self.ll_matcher.get_error()  # type: ignore[attr-defined]
-            if err:
-                logger.warning("Error in LLMatcher: %s", err)
-
-        llguidance.torch.fill_next_token_bitmask(self.ll_matcher, self.bitmask,
-                                                 0)
-        llguidance.torch.apply_token_bitmask_inplace(
-            scores,
-            self.bitmask.to(scores.device))  # type: ignore[attr-defined]
-
-        self.new_sampling = True
-
-        return scores
diff --git a/vllm/model_executor/guided_decoding/guided_fields.py b/vllm/model_executor/guided_decoding/guided_fields.py
deleted file mode 100644
index fa97b6dbf5115..0000000000000
--- a/vllm/model_executor/guided_decoding/guided_fields.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from dataclasses import dataclass
-from typing import Optional, TypedDict, Union
-
-
-# These classes are deprecated, see SamplingParams
-class LLMGuidedOptions(TypedDict, total=False):
-    guided_json: Union[dict, str]
-    guided_regex: str
-    guided_choice: list[str]
-    guided_grammar: str
-    guided_decoding_backend: str
-    guided_whitespace_pattern: str
-    guided_json_object: bool
-
-
-@dataclass
-class GuidedDecodingRequest:
-    """One of the fields will be used to retrieve the logit processor."""
-    guided_json: Optional[Union[dict, str]] = None
-    guided_regex: Optional[str] = None
-    guided_choice: Optional[list[str]] = None
-    guided_grammar: Optional[str] = None
-    guided_decoding_backend: Optional[str] = None
-    guided_whitespace_pattern: Optional[str] = None
-    guided_json_object: Optional[bool] = None
-    structural_tag: Optional[str] = None
-
-    def __post_init__(self):
-        """Validate that some fields are mutually exclusive."""
-        guide_count = sum(x is not None
-                          for x in (self.guided_json, self.guided_regex,
-                                    self.guided_choice, self.guided_grammar,
-                                    self.guided_json_object,
-                                    self.structural_tag))
-        if guide_count > 1:
-            raise ValueError(
-                "You can only use one kind of guided decoding but multiple are "
-                f"specified: {self.__dict__}")
diff --git a/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py b/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py
deleted file mode 100644
index f9b51f4c15745..0000000000000
--- a/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from functools import lru_cache
-from json import loads as json_loads
-from typing import Optional, Union
-
-from lmformatenforcer import (CharacterLevelParser, JsonSchemaParser,
-                              RegexParser, StringParser,
-                              TokenEnforcerTokenizerData, UnionParser)
-from lmformatenforcer.integrations.vllm import (
-    build_vllm_logits_processor, build_vllm_token_enforcer_tokenizer_data)
-from transformers import PreTrainedTokenizerBase
-
-from vllm.logits_process import LogitsProcessor
-from vllm.sampling_params import GuidedDecodingParams
-
-
-def get_local_lm_format_enforcer_guided_decoding_logits_processor(
-        guided_params: GuidedDecodingParams,
-        tokenizer) -> Optional[LogitsProcessor]:
-    """
-    Given an OpenAI-compatible request, check for guided decoding parameters
-    and get the necessary logits processor for the given guide.
-    We cache logit processors by (guide, tokenizer), and on cache hit
-    we make a shallow copy to reuse the same underlying FSM.
-    """
-
-    tokenizer_data = _cached_build_vllm_token_enforcer_tokenizer_data(
-        tokenizer)
-    character_level_parser: CharacterLevelParser
-    if guided_params.json:
-        schema_dict = _normalize_json_schema_object(guided_params.json)
-        character_level_parser = JsonSchemaParser(schema_dict)
-    elif guided_params.choice:
-        character_level_parser = UnionParser(
-            [StringParser(choice) for choice in guided_params.choice])
-    elif guided_params.regex:
-        character_level_parser = RegexParser(guided_params.regex)
-    elif guided_params.grammar:
-        # CFG grammar not supported by LMFE
-        raise ValueError("Cannot construct a guided decoding logits processor"
-                         " using the grammar option with the"
-                         " lm_format_enforcer backend.")
-    elif guided_params.json_object:
-        # None means any json object
-        character_level_parser = JsonSchemaParser(None)
-    else:
-        return None
-
-    logits_processor = build_vllm_logits_processor(tokenizer_data,
-                                                   character_level_parser)
-    return logits_processor
-
-
-def _normalize_json_schema_object(schema: Union[str, dict]) -> dict:
-    if isinstance(schema, str):
-        return json_loads(schema)
-    if isinstance(schema, dict):
-        return schema
-    raise AssertionError(f"Unsupported schema type {schema}")
-
-
-@lru_cache
-def _cached_build_vllm_token_enforcer_tokenizer_data(
-        tokenizer: PreTrainedTokenizerBase) -> TokenEnforcerTokenizerData:
-    return build_vllm_token_enforcer_tokenizer_data(tokenizer)
diff --git a/vllm/model_executor/guided_decoding/outlines_decoding.py b/vllm/model_executor/guided_decoding/outlines_decoding.py
deleted file mode 100644
index 7e365b294438b..0000000000000
--- a/vllm/model_executor/guided_decoding/outlines_decoding.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import asyncio
-import concurrent.futures
-import os
-from enum import Enum
-from json import dumps as json_dumps
-from typing import Optional, Union
-
-from regex import escape as regex_escape
-from transformers import PreTrainedTokenizerBase
-
-from vllm.model_executor.guided_decoding.outlines_logits_processors import (
-    JSONLogitsProcessor, RegexLogitsProcessor)
-from vllm.reasoning import ReasoningParser
-from vllm.sampling_params import GuidedDecodingParams
-
-
-class GuidedDecodingMode(Enum):
-    JSON = "json"
-    REGEX = "regex"
-    CHOICE = "choice"
-
-
-global_thread_pool = None  # used for generating logits processor fsm
-
-# It's not yet clear that using more provides a benefit, and it could
-# potentially starve other processes on the machine. We'll cap this for now and
-# adjust later if testing proves it to help overcome a bottleneck.
-_MAX_THREADPOOL_WORKERS = 16
-
-
-async def get_outlines_guided_decoding_logits_processor(
-    guided_params: GuidedDecodingParams, tokenizer: PreTrainedTokenizerBase,
-    reasoner: Optional[ReasoningParser]
-) -> Union[JSONLogitsProcessor, RegexLogitsProcessor, None]:
-    """
-    Given an OpenAI-compatible request, check for guided decoding parameters
-    and get the necessary logits processor for the given guide.
-    """
-    global global_thread_pool
-    guide, mode = _get_guide_and_mode(guided_params)
-    if not guide or not mode:
-        return None
-
-    if global_thread_pool is None:
-        max_workers = os.cpu_count() or 2
-        if max_workers > _MAX_THREADPOOL_WORKERS:
-            max_workers = _MAX_THREADPOOL_WORKERS
-        global_thread_pool = concurrent.futures.ThreadPoolExecutor(
-            max_workers=max_workers)
-    loop = asyncio.get_running_loop()
-    return await loop.run_in_executor(global_thread_pool,
-                                      _get_logits_processor, guide, tokenizer,
-                                      mode, guided_params.whitespace_pattern,
-                                      reasoner)
-
-
-def get_local_outlines_guided_decoding_logits_processor(
-    guided_params: GuidedDecodingParams, tokenizer: PreTrainedTokenizerBase,
-    reasoner: Optional[ReasoningParser]
-) -> Union[JSONLogitsProcessor, RegexLogitsProcessor, None]:
-    """
-    Given an OpenAI-compatible request, check for guided decoding parameters
-    and get the necessary logits processor for the given guide.
-    """
-    guide, mode = _get_guide_and_mode(guided_params)
-    if not guide or not mode:
-        return None
-
-    return _get_logits_processor(guide, tokenizer, mode,
-                                 guided_params.whitespace_pattern, reasoner)
-
-
-def _get_guide_and_mode(
-    guided_params: GuidedDecodingParams
-) -> Union[tuple[str, GuidedDecodingMode], tuple[None, None]]:
-    if guided_params.json:
-        if isinstance(guided_params.json, dict):
-            # turn dict into hashable string
-            json = json_dumps(guided_params.json)
-        else:
-            json = guided_params.json
-        return json, GuidedDecodingMode.JSON
-    elif guided_params.regex:
-        return guided_params.regex, GuidedDecodingMode.REGEX
-    elif guided_params.choice:
-        # choice just uses regex
-        choices = [
-            regex_escape(str(choice)) for choice in guided_params.choice
-        ]
-        choices_regex = "(" + "|".join(choices) + ")"
-        return choices_regex, GuidedDecodingMode.CHOICE
-    elif guided_params.grammar:
-        raise ValueError(
-            "The `outlines` guided decoding backend no longer supports grammar "
-            "guided generation. Please use either the `xgrammar` or `guidance` "
-            "backend")
-    else:
-        return None, None
-
-
-def _get_logits_processor(
-    guide: str,
-    tokenizer: PreTrainedTokenizerBase,
-    mode: GuidedDecodingMode,
-    whitespace_pattern: Union[str, None],
-    reasoner: Optional[ReasoningParser],
-) -> Union[JSONLogitsProcessor, RegexLogitsProcessor]:
-    if mode == GuidedDecodingMode.JSON:
-        return JSONLogitsProcessor(guide, tokenizer, whitespace_pattern,
-                                   reasoner)
-    elif mode == GuidedDecodingMode.REGEX or mode == GuidedDecodingMode.CHOICE:
-        return RegexLogitsProcessor(guide, tokenizer, reasoner)
-    else:
-        raise ValueError(f"Unknown guided decoding mode {mode}")
diff --git a/vllm/model_executor/guided_decoding/outlines_logits_processors.py b/vllm/model_executor/guided_decoding/outlines_logits_processors.py
deleted file mode 100644
index 7f047a1df6a58..0000000000000
--- a/vllm/model_executor/guided_decoding/outlines_logits_processors.py
+++ /dev/null
@@ -1,307 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-# SPDX-FileCopyrightText: Copyright 2024-present the Outlines developers
-from __future__ import annotations
-
-import copy
-import hashlib
-import importlib.metadata
-import json
-import os
-from typing import Optional, Union
-
-import regex as re
-import torch
-from cachetools import LRUCache
-from diskcache import Cache
-from outlines_core import Guide, Index, Vocabulary
-from outlines_core.json_schema import build_regex_from_schema
-from outlines_core.kernels.torch import (_apply_token_bitmask_inplace_kernel,
-                                         allocate_token_bitmask)
-from pydantic import BaseModel
-from transformers import PreTrainedTokenizerBase
-from transformers.file_utils import SPIECE_UNDERLINE
-from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
-
-import vllm.envs as envs
-from vllm.logger import init_logger
-from vllm.reasoning import ReasoningParser
-from vllm.transformers_utils.tokenizer import AnyTokenizer
-
-logger = init_logger(__name__)
-
-CACHE = None
-
-
-class BaseLogitsProcessor:
-
-    def __init__(self, guide: Guide, eos_token_id: int,
-                 reasoner: Optional[ReasoningParser]) -> None:
-        self._guide: Guide = guide
-        self._eos_token_id: int = eos_token_id
-        self._reasoner: Optional[ReasoningParser] = reasoner
-        self._mask: Optional[torch.Tensor] = None
-
-    def __call__(self, input_ids: list[int],
-                 scores: torch.Tensor) -> torch.Tensor:
-        if self._mask is None:
-            self._mask = allocate_token_bitmask(scores.size(-1))
-
-        # Skip the structured logits processing if reasoning is not finished.
-        # reasoner is not None only when `--reasoning-parser` is set.
-        if self._reasoner is not None and not self._reasoner.is_reasoning_end(
-                input_ids):
-            return scores
-
-        # Remove the reasoning tokens from the input_ids
-        # We need this because our implementation relies on the
-        # input_ids sequence to store the FSM state.
-        input_ids = (self._reasoner.extract_content_ids(input_ids)
-                     if self._reasoner is not None else input_ids)
-
-        # Vllm V0 engine has a weird bug where we have to repeat
-        # the eos token id twice for generation to stop, or at least
-        # that is what we have to do from here in any case.
-        # This is a patch until a better solution can be pushed
-        # to outlines_core
-        if input_ids and input_ids[-1] != self._eos_token_id:
-            self._guide.advance(token_id=input_ids[-1], return_tokens=False)
-
-        self._guide.write_mask_into(
-            data_ptr=self._mask.data_ptr(),
-            numel=self._mask.numel(),
-            element_size=self._mask.element_size(),
-        )
-
-        # Any allowed tokens beyond the length of the scores will
-        # be ignored by the kernel, taking care of the issue with
-        # models such as Llama 3.2 Vision with an `<|image|>` token
-        # with id 128256, but scores.shape == torch.Size([128256])
-        _apply_token_bitmask_inplace_kernel(
-            logits=scores.unsqueeze(dim=0),
-            # mask must be on same device
-            mask=self._mask.to(scores.device, non_blocking=True))
-        self._mask.to("cpu", non_blocking=True)
-
-        return scores
-
-    def clone(self) -> BaseLogitsProcessor:
-        guide = copy.deepcopy(self._guide)
-        guide.reset()
-        return BaseLogitsProcessor(guide=guide,
-                                   eos_token_id=self._eos_token_id,
-                                   reasoner=self._reasoner)
-
-
-class RegexLogitsProcessor(BaseLogitsProcessor):
-
-    @classmethod
-    def _get_guide(cls, regex_string: str,
-                   tokenizer: PreTrainedTokenizerBase) -> Guide:
-        global CACHE
-        if CACHE is None:
-            CACHE = get_cache()
-        vocabulary = get_vocabulary(tokenizer)  # type: ignore[arg-type]
-        cache_key = f"{vocabulary._hash}_{regex_string}"
-        if CACHE is not None and cache_key in CACHE:
-            return Guide(CACHE[cache_key])
-
-        index = Index(regex_string, vocabulary.inner)
-
-        if CACHE is not None:
-            CACHE[cache_key] = index
-
-        return Guide(index)
-
-    def __init__(self, regex_string: str, tokenizer: PreTrainedTokenizerBase,
-                 reasoner: Optional[ReasoningParser]) -> None:
-        super().__init__(
-            guide=RegexLogitsProcessor._get_guide(regex_string, tokenizer),
-            eos_token_id=tokenizer.eos_token_id,  # type: ignore
-            reasoner=reasoner)
-
-
-class JSONLogitsProcessor(RegexLogitsProcessor):
-
-    def __init__(self, schema: Union[str, dict, BaseModel],
-                 tokenizer: PreTrainedTokenizerBase,
-                 whitespace_pattern: Union[str, None],
-                 reasoner: Optional[ReasoningParser]) -> None:
-
-        if isinstance(schema, type(BaseModel)):
-            schema_str = json.dumps(schema.model_json_schema())
-        elif isinstance(schema, dict):
-            schema_str = json.dumps(schema)
-        elif isinstance(schema, str):
-            schema_str = schema
-        else:
-            raise ValueError(
-                f"Cannot parse schema {schema}. The schema must be either "
-                f"a Pydantic object, a dictionary or a string that contains "
-                f"the JSON Schema specification")
-
-        regex_string = build_regex_from_schema(schema_str, whitespace_pattern)
-        super().__init__(regex_string, tokenizer, reasoner)
-
-
-class OutlinesVocabulary:
-    """
-    Wrapper class for `outlines_core.Vocabulary`,
-    which allows us to store a hash with the vocabulary
-    """
-
-    def __init__(self, vocabulary: Vocabulary) -> None:
-        # Actual vocabulary object
-        self.inner = vocabulary
-        # Have to do abs(hash()) because python hashes can
-        # be negative, and we are using hash as a cache key.
-        hex_str = hashlib.sha256(
-            vocabulary.__repr__().encode('utf-8')).hexdigest()
-        hash_int = int(hex_str, 16)
-        self._hash = hash_int
-
-
-re_llama_byte_token = re.compile(r"^<0x[0-9A-F]{2}>$")
-re_replacement_seq = re.compile(r"^.{0,6}�+.{0,6}$")
-
-
-def _reduced_vocabulary(tokenizer: AnyTokenizer,
-                        eos_token_id: int) -> dict[bytes, list[int]]:
-    """Create a map from vocabulary tokens to lists of equivalent token ids.
-    
-    Returns:
-        A Dict of token string -> equivalent token ids
-    """
-    unicode_to_bytes = {v: k for k, v in bytes_to_unicode().items()}
-
-    def convert_token_to_string(token: str) -> str:
-
-        string = tokenizer.convert_tokens_to_string([token])
-
-        # A hack to handle missing spaces to HF's Llama tokenizers
-        if (type(token) is str and token.startswith(SPIECE_UNDERLINE)
-                or token == "<0x20>"):
-            return " " + string
-
-        return string
-
-    vocabulary: dict[bytes, list[int]] = {}
-    empty_token_ids: list[int] = []
-    for token, token_idx in tokenizer.get_vocab().items():
-        if token in tokenizer.all_special_tokens:  # type: ignore
-            continue
-
-        token_str = convert_token_to_string(token)
-        if token_str:
-            if isinstance(token, (bytes, bytearray)):
-                # For BPE tokenizers where tokens are stored as bytes.
-
-                # safe to ignore since token_str is of type (bytearray, bytes)
-                # by this point.
-                token_bytes = bytes(token_str)  # type: ignore[arg-type]
-
-            elif "\ufffd" in token_str and not re_replacement_seq.match(
-                    token_str):
-                # Handle tokens with invalid UTF-8 sequences.
-                if re_llama_byte_token.match(token):
-                    # Llama-like tokenizers use <0xXX> for incomplete sequences.
-                    token_bytes = bytes([int(token[3:5], 16)])
-                else:
-                    # GPT2 tokenizers: map each byte back using unicode_to_bytes
-                    byte_vals = [unicode_to_bytes.get(c) for c in token]
-                    if None in byte_vals:
-                        raise RuntimeError(
-                            f"Cannot convert token `{token}`"
-                            f" ({token_idx}) to bytes: {token_str}")
-                    # safe to ignore, since if None in byte_vals,
-                    # an error is thrown.
-                    token_bytes = bytes(byte_vals)  # type: ignore[arg-type]
-            else:
-                token_bytes = token_str.encode('utf-8')
-
-            if token_idx != eos_token_id:
-                vocabulary.setdefault(token_bytes, []).append(token_idx)
-        else:
-            empty_token_ids.append(token_idx)
-
-    return vocabulary
-
-
-def get_vocabulary(tokenizer: AnyTokenizer) -> Vocabulary:
-    """Get the `Vocabulary` object for a given tokenizer.
-    """
-    if hasattr(tokenizer, "_outlines_vocabulary"):
-        return tokenizer._outlines_vocabulary  # type: ignore
-
-    try:
-        if hasattr(
-                tokenizer,
-                "eos_token_id",
-        ) and tokenizer.eos_token_id is not None:
-            eos_token_id = tokenizer.eos_token_id
-        else:
-            raise ValueError(
-                f"Error during guided decoding setup: Tokenizer"
-                f" ({type(tokenizer)}) has no `eos_token_id` property, "
-                "but `eos_token_id` is required for guided decoding"
-                " to work properly.")
-
-        reduced_vocab = _reduced_vocabulary(
-            tokenizer,
-            eos_token_id  #type: ignore
-        )
-        vocabulary = OutlinesVocabulary(Vocabulary(eos_token_id,
-                                                   reduced_vocab))
-        tokenizer._outlines_vocabulary = vocabulary  # type: ignore
-
-        return vocabulary
-    except AttributeError as e:
-        raise ValueError(f"Cannot get the vocabulary of the tokenizer "
-                         f"({type(tokenizer)}). The tokenizer should have a "
-                         "get_vocab method.") from e
-
-
-def get_cache_path() -> str:
-    """Get the context object that contains previously-computed return values"""
-    outlines_cache_dir = os.getenv("OUTLINES_CACHE_DIR")
-    xdg_cache_home = os.getenv("XDG_CACHE_HOME")
-    home_dir = os.path.expanduser("~")
-
-    if outlines_cache_dir:
-        # OUTLINES_CACHE_DIR takes precedence
-        return outlines_cache_dir
-    elif xdg_cache_home:
-        return os.path.join(xdg_cache_home, ".cache", "outlines")
-    # If homedir is "/", we may be inside a container, and thus writing to
-    # root would be problematic, so we fallback to using a tempfile.
-    # Also validate the path exists, since os.path.expanduser does
-    # not garuntee existence.
-    elif os.path.isdir(home_dir) and home_dir != "/":
-        # Default Unix fallback: ~/.cache/outlines
-        return os.path.join(home_dir, ".cache", "outlines")
-    else:
-        import tempfile
-
-        # home_dir may be / inside a docker container without existing user
-        tempdir = tempfile.gettempdir()
-        return os.path.join(tempdir, ".cache", "outlines")
-
-
-def get_cache():
-    """Get the Cache instance to be used for index caching"""
-
-    cache_dir = get_cache_path()
-    if envs.VLLM_V0_USE_OUTLINES_CACHE:
-        logger.warning("Enabling outlines cache. This is an unbounded on-disk "
-                       "cache. It may consume a lot of disk space and should "
-                       "not be used with untrusted clients.")
-        cache = Cache(cache_dir, eviction_policy="none", cull_limit=0)
-        outlines_version = importlib.metadata.version("outlines_core")
-
-        cached_version = cache.get('__version__', None)
-        if cached_version != outlines_version:
-            cache.clear()
-        cache.set('__version__', outlines_version)
-        return cache
-    else:
-        return LRUCache(maxsize=128)
diff --git a/vllm/model_executor/guided_decoding/utils.py b/vllm/model_executor/guided_decoding/utils.py
deleted file mode 100644
index 8fdfa983e120b..0000000000000
--- a/vllm/model_executor/guided_decoding/utils.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import regex as re
-
-
-def has_xgrammar_unsupported_json_features(schema: dict) -> bool:
-    """Check if JSON schema contains features unsupported by xgrammar."""
-
-    def check_object(obj: dict) -> bool:
-        if not isinstance(obj, dict):
-            return False
-
-        # Check for numeric ranges
-        if obj.get("type") in ("integer", "number") and ("multipleOf" in obj):
-            return True
-
-        # Check for array unsupported keywords
-        if obj.get("type") == "array" and any(key in obj for key in [
-                "uniqueItems", "contains", "minContains", "maxContains",
-                "minItems", "maxItems"
-        ]):
-            return True
-
-        # Unsupported keywords for strings
-        if obj.get("type") == "string" and any(
-                key in obj for key in ["minLength", "maxLength", "format"]):
-            return True
-
-        # Unsupported keywords for objects
-        if obj.get("type") == "object" and any(key in obj for key in [
-                "minProperties", "maxProperties", "propertyNames",
-                "patternProperties"
-        ]):
-            return True
-
-        # Recursively check all nested objects and arrays
-        for value in obj.values():
-            if isinstance(value, dict):
-                if check_object(value):
-                    return True
-            elif isinstance(value, list):
-                for item in value:
-                    if isinstance(item, dict) and check_object(item):
-                        return True
-
-        return False
-
-    return check_object(schema)
-
-
-def has_lmf_unsupported_json_features(schema: dict) -> bool:
-    """
-    Check if JSON schema contains features unsupported 
-    by lm_format_enforcer.
-
-    Known issues:
-    - Regex patterns:
-        "grade": {
-            "type": "string",
-            "pattern": "^[A-D]$"  # Regex pattern
-        },
-    """
-
-    def check_object(obj: dict) -> bool:
-        if not isinstance(obj, dict):
-            return False
-
-        # Check for pattern restrictions
-        if "pattern" in obj:
-            return True
-
-        # Recursively check all nested objects and arrays
-        for value in obj.values():
-            if isinstance(value, dict):
-                if check_object(value):
-                    return True
-            elif isinstance(value, list):
-                for item in value:
-                    if isinstance(item, dict) and check_object(item):
-                        return True
-
-        return False
-
-    return check_object(schema)
-
-
-def grammar_is_likely_lark(grammar_str: str) -> bool:
-    """
-    Check if grammar appears to use Lark syntax.
-    
-    Args:
-        grammar_str: Input grammar string
-        
-    Returns:
-        bool: True if grammar appears to be in Lark format, False otherwise
-        
-    Examples:
-        >>> grammar_is_likely_lark("rule: 'abc'")
-        True
-        >>> grammar_is_likely_lark("rule ::= 'abc'")
-        False
-    """
-    if not grammar_str or not isinstance(grammar_str, str):
-        return False
-
-    for line in grammar_str.split('\n'):
-        # Remove both comment styles
-        line = re.sub(r'(#|//).*$', '', line).strip()
-        if not line:
-            continue
-
-        # Look for GBNF rule definition
-        if '::=' in line:
-            return False
-
-    return True
-
-
-def convert_lark_to_gbnf(grammar_str: str) -> str:
-    """
-    Convert a Lark grammar string to GBNF format.
-
-    GBNF reference:
-    https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md
-    Lark grammar reference:
-    https://lark-parser.readthedocs.io/en/latest/grammar.html
-    
-    Args:
-        grammar_str: Input grammar in Lark format
-        
-    Returns:
-        str: Converted grammar in GBNF format
-        
-    Examples:
-        >>> print(convert_lark_to_gbnf("rule: 'hello'"))
-        root ::= rule
-        rule ::= "hello"
-    """
-    if not isinstance(grammar_str, str):
-        raise ValueError(f"Grammar must be a string, got {type(grammar_str)}")
-    if not grammar_str.strip():
-        raise ValueError("Grammar string cannot be empty")
-
-    defined_rules = set()
-    referenced_rules = set()
-    output_lines = []
-
-    def clean_line(line: str) -> str:
-        """Remove comments and whitespace from line."""
-        return re.sub(r'(#|//).*$', '', line).strip()
-
-    def check_quotes(text: str, rule_name: str, line_num: int) -> None:
-        """Validate quote matching in text."""
-        if text.count("'") % 2 != 0 or text.count('"') % 2 != 0:
-            raise ValueError(
-                f"Mismatched quotes in {rule_name} on line {line_num}")
-
-    def extract_references(text: str) -> set:
-        """Extract rule references from text."""
-        # Remove quoted strings and special characters
-        text = re.sub(r'"[^"]*"', '', text)
-        text = re.sub(r'[+*?()|\[\]{}]', ' ', text)
-        return set(re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', text))
-
-    # First pass: Find root rule and validate rule definitions
-    lines = [clean_line(line) for line in grammar_str.split('\n')]
-    first_rule = None
-
-    for line_num, line in enumerate(lines, 1):
-        if not line or line.startswith('|'):
-            continue
-
-        if ':' in line:
-            try:
-                name = line.split(':', 1)[0].strip().strip('?')
-                defined_rules.add(name)
-                if first_rule is None:
-                    first_rule = name
-                if name == 'start':
-                    first_rule = 'start'
-            except IndexError as e:
-                raise ValueError(f"Invalid rule format on line {line_num}. "
-                                 "Expected 'rule_name: definition'") from e
-
-    if not defined_rules:
-        raise ValueError("No valid rules found in grammar")
-
-    # Add root rule
-    output_lines.append(f"root ::= {first_rule}")
-
-    # Second pass: Process rule definitions and alternatives
-    current_rule = None
-    current_definition = []
-
-    for line_num, line in enumerate(lines, 1):
-        if not line:
-            continue
-
-        try:
-            if ':' in line and not line.startswith('|'):
-                # Save previous rule if exists
-                if current_rule:
-                    output_lines.append(
-                        f"{current_rule} ::= {' | '.join(current_definition)}")
-
-                # Process new rule
-                name, definition = line.split(':', 1)
-                current_rule = name.strip().strip('?')
-
-                check_quotes(definition, f"rule '{current_rule}'", line_num)
-                definition = re.sub(r"'([^']*)'", r'"\1"', definition)
-                referenced_rules.update(extract_references(definition))
-                current_definition = [definition.strip()]
-
-            elif line.startswith('|'):
-                if not current_rule:
-                    raise ValueError(f"Alternative '|' on line {line_num} "
-                                     "without a preceding rule definition")
-
-                alt_def = line[1:].strip()
-                check_quotes(alt_def, f"alternative for rule '{current_rule}'",
-                             line_num)
-                alt_def = re.sub(r"'([^']*)'", r'"\1"', alt_def)
-                referenced_rules.update(extract_references(alt_def))
-                current_definition.append(alt_def)
-
-        except ValueError as e:
-            raise ValueError(f"Error on line {line_num}: {str(e)}") from e
-
-    # Add final rule if exists
-    if current_rule:
-        output_lines.append(
-            f"{current_rule} ::= {' | '.join(current_definition)}")
-
-    # Validate all rules are defined
-    undefined_rules = referenced_rules - defined_rules - {'root'}
-    if undefined_rules:
-        raise ValueError("Referenced rules are not defined: "
-                         f"{', '.join(sorted(undefined_rules))}")
-
-    return '\n'.join(output_lines)
diff --git a/vllm/model_executor/guided_decoding/xgrammar_decoding.py b/vllm/model_executor/guided_decoding/xgrammar_decoding.py
deleted file mode 100644
index bdd3a1a9c0a59..0000000000000
--- a/vllm/model_executor/guided_decoding/xgrammar_decoding.py
+++ /dev/null
@@ -1,426 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# noqa: UP007
-from __future__ import annotations
-
-import json
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
-
-import regex as re
-import torch
-
-import vllm.envs
-from vllm.logger import init_logger
-
-try:
-    import xgrammar as xgr
-    xgr_installed = True
-except ImportError:
-    xgr_installed = False
-    pass
-
-from vllm.model_executor.guided_decoding.utils import (convert_lark_to_gbnf,
-                                                       grammar_is_likely_lark)
-from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
-
-if TYPE_CHECKING:
-    from transformers import PreTrainedTokenizer
-
-    from vllm.config import ModelConfig
-    from vllm.reasoning import ReasoningParser
-    from vllm.sampling_params import GuidedDecodingParams
-
-logger = init_logger(__name__)
-
-
-def get_local_xgrammar_guided_decoding_logits_processor(
-        guided_params: GuidedDecodingParams,
-        tokenizer: PreTrainedTokenizer,
-        model_config: ModelConfig,
-        reasoner: ReasoningParser | None,
-        max_threads: int = 8):
-    config = GrammarConfig.from_guided_params(guided_params=guided_params,
-                                              model_config=model_config,
-                                              tokenizer=tokenizer,
-                                              max_threads=max_threads)
-    return XGrammarLogitsProcessor(config, reasoner)
-
-
-@dataclass(frozen=True)
-class TokenizerData:
-    """Immutable container for cached tokenizer data."""
-    metadata: str
-    encoded_vocab: list[str] = field(default_factory=list)
-
-
-class TokenizerDataCache:
-    """Cache manager for tokenizer data to avoid repeated processing."""
-    _cache: dict[int, TokenizerData] = {}
-
-    @classmethod
-    def get_tokenizer_data(
-        cls,
-        tokenizer: PreTrainedTokenizer,
-        /,
-        *,
-        tokenizer_hash: int,
-        vocab_size: int,
-    ) -> TokenizerData:
-
-        if tokenizer_hash not in cls._cache:
-            tokenizer_info = xgr.TokenizerInfo.from_huggingface(
-                tokenizer,
-                # NOTE: We will need to use lm_head's vocab_size
-                # to determine correct special_token_ids for this tokenizer.
-                # See https://github.com/mlc-ai/xgrammar/commit/70c959fb6d9cea75aae33c414763cd0602022d92  # noqa: E501
-                vocab_size=vocab_size,
-            )
-            metadata = json.loads(tokenizer_info.dump_metadata())
-
-            # Vendored from xgrammar logic to get encoded_vocab
-            # https://github.com/mlc-ai/xgrammar/blob/989222175c2a30fb7987d8bcce35bec1bf6817f2/python/xgrammar/tokenizer_info.py#L127 # noqa: E501
-            try:
-                vocab_dict = tokenizer.get_vocab()
-            except AttributeError as e:
-                raise ValueError(
-                    f"Cannot get the vocabulary of the tokenizer "
-                    f"{type(tokenizer)}. The tokenizer should have a "
-                    "get_vocab method.") from e
-
-            # maintain tokenizer's indexing
-            encoded_vocab = [""] * tokenizer_info.vocab_size
-            for token, idx in vocab_dict.items():
-                if idx < tokenizer_info.vocab_size:
-                    encoded_vocab[idx] = token
-
-            if isinstance(tokenizer, MistralTokenizer):
-                # REF: https://github.com/mlc-ai/xgrammar/blob/5e141f6ff1ca02bc31f9e512e68b61f2a8ae88e5/tests/python/test_tokenizer_info.py#L43 # noqa: E501
-                metadata.update({
-                    "vocab_type": xgr.VocabType.BYTE_FALLBACK,
-                    "add_prefix_space": True
-                })
-
-            cls._cache[tokenizer_hash] = TokenizerData(
-                encoded_vocab=encoded_vocab,
-                metadata=json.dumps(metadata),
-            )
-
-        return cls._cache[tokenizer_hash]
-
-
-class GrammarCompilerCache:
-    """
-    Cache for GrammarCompiler instances based on tokenizer.
-
-    This cache reduces the overhead of creating new compiler instances when
-    using the same tokenizer configuration.
-    """
-    _cache: dict[str, xgr.GrammarCompiler] = {}
-
-    @classmethod
-    def get_compiler(cls, config: GrammarConfig) -> xgr.GrammarCompiler:
-        cache_key = str(config.tokenizer_hash)
-
-        if cache_key not in cls._cache:
-            config_data = config.tokenizer_data
-
-            # In TokenizerDataCache.get_tokenizer_data, a serializable
-            # tokenizer_data is created and cached. This data is used to build
-            # a tokenizer_info and create an xgrammar compiler.
-            tokenizer_info = xgr.TokenizerInfo.from_vocab_and_metadata(
-                encoded_vocab=config_data.encoded_vocab,
-                metadata=config_data.metadata,
-            )
-            cache_size = vllm.envs.VLLM_XGRAMMAR_CACHE_MB * 1024 * 1024
-            cls._cache[cache_key] = xgr.GrammarCompiler(
-                tokenizer_info,
-                max_threads=config.max_threads,
-                cache_enabled=True,
-                cache_limit_bytes=cache_size,
-            )
-
-        return cls._cache[cache_key]
-
-
-@dataclass
-class GrammarConfig:
-    """Serializable configuration for grammar compilation"""
-    tokenizer_hash: int
-    tokenizer_data: TokenizerData
-    json_str: str | None = None
-    grammar_str: str | None = None
-    json_object: bool | None = None
-    any_whitespace: bool = True
-    regex_str: str | None = None
-    max_threads: int = 8
-
-    @classmethod
-    def from_guided_params(cls,
-                           guided_params: GuidedDecodingParams,
-                           model_config: ModelConfig,
-                           tokenizer: PreTrainedTokenizer,
-                           max_threads: int = 8) -> GrammarConfig:
-
-        tokenizer_hash = hash(tokenizer)
-        tokenizer_data = TokenizerDataCache.get_tokenizer_data(
-            tokenizer,
-            tokenizer_hash=tokenizer_hash,
-            vocab_size=model_config.hf_text_config.vocab_size,
-        )
-
-        if guided_params.json:
-            if not isinstance(guided_params.json, str):
-                json_str = json.dumps(guided_params.json)
-            else:
-                json_str = guided_params.json
-
-            any_whitespace = not guided_params.disable_any_whitespace
-
-            # Check and log if model with xgrammar and whitespace have history
-            # of runaway generation of whitespaces.
-            # References:
-            # https://github.com/vllm-project/vllm/pull/12744
-            # https://github.com/mlc-ai/xgrammar/issues/212
-            model_with_warn = None
-
-            if 'Mistral' in model_config.model:
-                model_with_warn = 'Mistral'
-            elif 'Qwen' in model_config.model:
-                model_with_warn = 'Qwen'
-
-            if model_with_warn is not None and any_whitespace:
-                logger.info_once(
-                    "%s model detected, consider setting `disable_any_whitespace` to prevent runaway generation of whitespaces.",  # noqa: E501
-                    model_with_warn,
-                )
-            # Validate the schema and raise ValueError here if it is invalid.
-            # This is to avoid exceptions in model execution, which will crash
-            # the engine worker process.
-            try:
-                xgr.Grammar.from_json_schema(json_str,
-                                             any_whitespace=any_whitespace)
-            except RuntimeError as err:
-                raise ValueError(str(err)) from err
-
-            return cls(json_str=json_str,
-                       tokenizer_hash=tokenizer_hash,
-                       max_threads=max_threads,
-                       tokenizer_data=tokenizer_data,
-                       any_whitespace=any_whitespace)
-        elif guided_params.grammar:
-            # XGrammar only supports GBNF grammars, so we must convert Lark
-            if grammar_is_likely_lark(guided_params.grammar):
-                try:
-                    grammar_str = convert_lark_to_gbnf(guided_params.grammar)
-                except ValueError as e:
-                    raise ValueError(
-                        "Failed to convert the grammar from Lark to GBNF. "
-                        "Please either use GBNF grammar directly or specify"
-                        " --guided-decoding-backend=outlines.\n"
-                        f"Conversion error: {str(e)}") from e
-            else:
-                grammar_str = guided_params.grammar
-
-            # Validate the grammar and raise ValueError here if it is invalid.
-            # This is to avoid exceptions in model execution, which will crash
-            # the engine worker process.
-            try:
-                xgr.Grammar.from_ebnf(grammar_str)
-            except RuntimeError as err:
-                raise ValueError(str(err)) from err
-
-            return cls(grammar_str=grammar_str,
-                       tokenizer_hash=tokenizer_hash,
-                       max_threads=max_threads,
-                       tokenizer_data=tokenizer_data)
-        elif guided_params.json_object:
-            return cls(
-                json_object=True,
-                tokenizer_hash=tokenizer_hash,
-                max_threads=max_threads,
-                tokenizer_data=tokenizer_data,
-            )
-        elif guided_params.choice:
-            choice_str = GrammarConfig.choice_as_grammar(guided_params.choice)
-            try:
-                xgr.Grammar.from_ebnf(choice_str)
-            except RuntimeError as err:
-                raise ValueError(str(err)) from err
-
-            return cls(
-                grammar_str=choice_str,
-                tokenizer_hash=tokenizer_hash,
-                max_threads=max_threads,
-                tokenizer_data=tokenizer_data,
-            )
-        elif guided_params.regex:
-            return cls(
-                regex_str=guided_params.regex,
-                tokenizer_hash=tokenizer_hash,
-                max_threads=max_threads,
-                tokenizer_data=tokenizer_data,
-            )
-        else:
-            raise ValueError(
-                "Currently only support JSON and EBNF grammar mode for xgrammar"
-            )
-
-    @staticmethod
-    def escape_ebnf_string(s: str) -> str:
-        """Escape special characters in a EBNF string."""
-        # Escape double quotes and backslashes
-        return re.sub(r'(["\\])', r'\\\1', s)
-
-    @staticmethod
-    def choice_as_grammar(choice: list[str] | None) -> str:
-        if choice is None:
-            raise ValueError("Choice is not set")
-        escaped_choices = (GrammarConfig.escape_ebnf_string(c) for c in choice)
-        grammar = ('root ::= ' + ' | '.join(f'"{c}"' for c in escaped_choices))
-        return grammar
-
-    @staticmethod
-    def tokenizer_info(tokenizer_data: TokenizerData) -> xgr.TokenizerInfo:
-        return xgr.TokenizerInfo.from_vocab_and_metadata(
-            encoded_vocab=tokenizer_data.encoded_vocab,
-            metadata=tokenizer_data.metadata,
-        )
-
-
-@dataclass
-class XGrammarLogitsProcessor:
-    """Wrapper class to support pickle protocol"""
-    config: GrammarConfig
-    reasoner: ReasoningParser | None = None
-
-    ctx: xgr.CompiledGrammar | None = None
-    tokenizer_info: xgr.TokenizerInfo = None  # type: ignore[assignment]
-    token_bitmask: torch.Tensor = None  # type: ignore[assignment]
-    matchers: list[xgr.GrammarMatcher] = field(default_factory=list)
-    batch_size: int = field(default=1)
-    prefilled: bool = field(default=False)
-
-    def __post_init__(self):
-        if self.tokenizer_info is None:
-            self.tokenizer_info = self.config.tokenizer_info(
-                self.config.tokenizer_data)
-
-    def __getstate__(self) -> dict[str, Any]:
-        return {'config': self.config, 'reasoner': self.reasoner}
-
-    def __setstate__(self, state: dict[str, Any]):
-        self.config = state['config']
-        self.reasoner = state['reasoner']
-
-        self.tokenizer_info = GrammarConfig.tokenizer_info(
-            self.config.tokenizer_data)
-        self.ctx = None
-        self.matchers = []
-        self.batch_size = 1
-        self.token_bitmask = None  # type: ignore[assignment]
-        self.prefilled = False
-
-    def _ensure_ctx(self):
-        """Lazily initialize the processor in the worker process"""
-        if self.ctx is None:
-            compiler = GrammarCompilerCache.get_compiler(self.config)
-            if self.config.json_str is not None:
-                any_whitespace = self.config.any_whitespace
-                self.ctx = compiler\
-                    .compile_json_schema(self.config.json_str,
-                                         any_whitespace=any_whitespace)
-            elif self.config.grammar_str is not None:
-                self.ctx = compiler.compile_grammar(self.config.grammar_str)
-            elif self.config.json_object:
-                any_whitespace = self.config.any_whitespace
-                self.ctx = compiler\
-                    .compile_json_schema('{"type": "object"}',
-                                         any_whitespace=any_whitespace)
-            elif self.config.regex_str:
-                self.ctx = compiler.compile_regex(self.config.regex_str)
-            else:
-                raise ValueError(
-                    "Invalid configuration for xgrammar logits processor")
-
-    def __call__(self, input_ids: list[int],
-                 scores: torch.Tensor) -> torch.Tensor:
-
-        # Skip the structured logits processing if reasoning is not finished.
-        # reasoner is not None only when `--reasoning-parser` is set.
-        if self.reasoner is not None and \
-        not self.reasoner.is_reasoning_end(
-                input_ids):
-            return scores
-
-        if self.ctx is None:
-            self._ensure_ctx()
-
-        if len(self.matchers) == 0:
-            self.matchers = [
-                xgr.GrammarMatcher(self.ctx) for _ in range(self.batch_size)
-            ]
-            self.token_bitmask = xgr.allocate_token_bitmask(
-                self.batch_size, self.tokenizer_info.vocab_size)
-
-        if not self.prefilled:
-            # Have not sampled a token yet
-            self.prefilled = True
-        else:
-            for i, matcher in enumerate(self.matchers):
-                if not matcher.is_terminated():
-                    sampled_token = input_ids[-1]
-                    assert self.matchers[i].accept_token(sampled_token)
-
-        for i, matcher in enumerate(self.matchers):
-            if not matcher.is_terminated():
-                # @ubospica: ideally, fill_next_token_bitmask should be
-                # parallelized with model decoding
-                # See https://github.com/vllm-project/vllm/pull/10785/files#r1864278303
-                matcher.fill_next_token_bitmask(self.token_bitmask, i)
-
-        # token_bitmask is a CPU tensor for use with accept_token and
-        # fill_next_token_bitmask so we move it to the device of scores
-        device_type = scores.device.type
-        dtype = scores.dtype
-        if device_type != "cuda":
-            # xgrammar on cpu only supports float32 scores
-            # see: https://github.com/mlc-ai/xgrammar/blob/c1b64920cad24f44f235778c1c00bb52d57da01a/python/xgrammar/kernels/apply_token_bitmask_inplace_cpu.py#L22
-            scores = scores.to("cpu").float().unsqueeze(0)
-
-        # Note: In this method, if the tensors have different dimensions
-        # on CPU device fails, but on GPU it runs without error. Hence the
-        # unsqueeze above for scores, to match the token bitmask shape
-        xgr.apply_token_bitmask_inplace(
-            scores, self.token_bitmask.to(scores.device, non_blocking=True))
-        if device_type != "cuda":
-            scores = scores.to(dtype).to(device_type).squeeze()
-
-        return scores
-
-    def clone(self) -> XGrammarLogitsProcessor:
-        """Create a new instance with shared compiled grammar
-          but separate state"""
-        new_processor = XGrammarLogitsProcessor(self.config, self.reasoner,
-                                                None, self.tokenizer_info)
-
-        # Share the compiled grammar context (immutable after compilation)
-        new_processor.ctx = self.ctx
-
-        # Create fresh matchers for the new sequence
-        if self.ctx is not None:
-            new_processor.matchers = [
-                xgr.GrammarMatcher(self.ctx) for _ in range(self.batch_size)
-            ]
-
-        # Create a new token bitmask with the same size
-        if hasattr(self, 'token_bitmask') and self.token_bitmask is not None:
-            new_processor.token_bitmask = self.token_bitmask
-
-        # Copy simple attributes
-        new_processor.batch_size = self.batch_size
-        # Reset prefilled state for new sequence
-        new_processor.prefilled = False
-
-        return new_processor

From 61a6905ab036fd00eafdb1b0ca130d5feccfe686 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Tue, 29 Jul 2025 18:25:07 +0800
Subject: [PATCH 011/224] [Model] Refactor JambaForCausalLM (#21394)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 vllm/model_executor/models/jamba.py | 231 ++++++++++++++--------------
 1 file changed, 116 insertions(+), 115 deletions(-)

diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py
index 34281b2e99ee8..263f4c8379cf2 100644
--- a/vllm/model_executor/models/jamba.py
+++ b/vllm/model_executor/models/jamba.py
@@ -25,6 +25,7 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.models.llama import LlamaMLP as JambaMLP
 from vllm.model_executor.models.mamba_cache import (MambaCacheManager,
                                                     MambaCacheParams)
 from vllm.model_executor.sampling_metadata import SamplingMetadata
@@ -33,7 +34,7 @@ from vllm.utils import LayerBlockType
 
 from .interfaces import (HasInnerState, IsHybrid, SupportsLoRA, SupportsPP,
                          SupportsV0Only)
-from .utils import (is_pp_missing_parameter,
+from .utils import (AutoWeightsLoader, WeightsMapper, is_pp_missing_parameter,
                     make_empty_intermediate_tensors_factory, make_layers,
                     maybe_prefix)
 
@@ -87,23 +88,6 @@ class JambaMoE(nn.Module):
         return hidden_states.view(orig_shape)
 
 
-class JambaMLP(JambaMoE):
-
-    def __init__(self,
-                 config: JambaConfig,
-                 params_dtype: Optional[torch.dtype] = None,
-                 tp_size: Optional[int] = None,
-                 quant_config: Optional[QuantizationConfig] = None,
-                 prefix: str = ""):
-        super().__init__(config,
-                         num_experts=1,
-                         top_k=1,
-                         params_dtype=params_dtype,
-                         tp_size=tp_size,
-                         quant_config=quant_config,
-                         prefix=prefix)
-
-
 class JambaMambaDecoderLayer(nn.Module):
 
     def __init__(self,
@@ -132,10 +116,20 @@ class JambaMambaDecoderLayer(nn.Module):
                                 )
 
         num_experts = config.layers_num_experts[layer_idx]
-        ffn_layer_class = JambaMoE if num_experts > 1 else JambaMLP
-        self.feed_forward = ffn_layer_class(config,
-                                            quant_config=quant_config,
-                                            prefix=f"{prefix}.feed_forward")
+        if num_experts > 1:
+            self.feed_forward = JambaMoE(
+                config,
+                quant_config=quant_config,
+                prefix=f"{prefix}.feed_forward",
+            )
+        else:
+            self.feed_forward = JambaMLP(
+                config.hidden_size,
+                config.intermediate_size,
+                config.hidden_act,
+                quant_config=quant_config,
+                prefix=f"{prefix}.feed_forward",
+            )
         self.input_layernorm = RMSNorm(config.hidden_size,
                                        eps=config.rms_norm_eps)
         self.pre_ff_layernorm = RMSNorm(config.hidden_size,
@@ -216,10 +210,20 @@ class JambaAttentionDecoderLayer(nn.Module):
         )
 
         num_experts = config.layers_num_experts[layer_idx]
-        ffn_layer_class = JambaMoE if num_experts > 1 else JambaMLP
-        self.feed_forward = ffn_layer_class(config,
-                                            quant_config=quant_config,
-                                            prefix=f"{prefix}.feed_forward")
+        if num_experts > 1:
+            self.feed_forward = JambaMoE(
+                config,
+                quant_config=quant_config,
+                prefix=f"{prefix}.feed_forward",
+            )
+        else:
+            self.feed_forward = JambaMLP(
+                config.hidden_size,
+                config.intermediate_size,
+                config.hidden_act,
+                quant_config=quant_config,
+                prefix=f"{prefix}.feed_forward",
+            )
         self.input_layernorm = RMSNorm(config.hidden_size,
                                        eps=config.rms_norm_eps)
         self.pre_ff_layernorm = RMSNorm(config.hidden_size,
@@ -359,15 +363,97 @@ class JambaModel(nn.Module):
         hidden_states, _ = self.final_layernorm(hidden_states, residual)
         return hidden_states
 
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        # Params for weights, fp8 weight scales, fp8 activation scales
+        # (param_name, weight_name, expert_id, shard_id)
+        return FusedMoE.make_expert_params_mapping(
+            ckpt_gate_proj_name="gate_proj",
+            ckpt_down_proj_name="down_proj",
+            ckpt_up_proj_name="up_proj",
+            num_experts=self.config.num_experts)
+
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+            (".gate_up_proj", ".gate_proj", 0),
+            (".gate_up_proj", ".up_proj", 1),
+        ]
+
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+        expert_params_mapping = self.get_expert_mapping()
+        for name, loaded_weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                if 'experts' in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                # Skip layers on other devices.
+                if is_pp_missing_parameter(name, self):
+                    continue
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                for (
+                        param_name,
+                        weight_name,
+                        expert_id,
+                        shard_id,
+                ) in expert_params_mapping:
+                    if weight_name not in name:
+                        continue
+
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    name = name.replace(weight_name, param_name)
+                    param = params_dict[name]
+                    weight_loader = param.weight_loader
+                    weight_loader(param,
+                                  loaded_weight,
+                                  name,
+                                  shard_id=shard_id,
+                                  expert_id=expert_id)
+                    break
+                else:
+                    # Skip loading extra bias for GPTQ models.
+                    if name.endswith(".bias") and name not in params_dict:
+                        continue
+                    if is_pp_missing_parameter(name, self):
+                        continue
+
+                    param = params_dict[name]
+                    weight_loader = getattr(param, "weight_loader",
+                                            default_weight_loader)
+                    weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+        return loaded_params
+
 
 class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
                        IsHybrid, SupportsV0Only):
+    hf_to_vllm_mapper = WeightsMapper(orig_to_new_substr={
+        ".self_attn.": ".",
+        ".A_log": ".A"
+    }, )
     packed_modules_mapping = {
         "qkv_proj": [
             "q_proj",
             "k_proj",
             "v_proj",
         ],
+        "gate_up_proj": ["gate_proj", "up_proj"],
         "in_proj": ["in_proj"],
     }
 
@@ -468,96 +554,11 @@ class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
 
     def load_weights(self, weights: Iterable[tuple[str,
                                                    torch.Tensor]]) -> set[str]:
-        stacked_params_mapping = [
-            # (param_name, shard_name, shard_id)
-            ("qkv_proj", "q_proj", "q"),
-            ("qkv_proj", "k_proj", "k"),
-            ("qkv_proj", "v_proj", "v"),
-        ]
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
 
-        # Params for weights, fp8 weight scales, fp8 activation scales
-        # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = FusedMoE.make_expert_params_mapping(
-            ckpt_gate_proj_name="gate_proj",
-            ckpt_down_proj_name="down_proj",
-            ckpt_up_proj_name="up_proj",
-            num_experts=self.config.num_experts)
-
-        params_dict = dict(self.named_parameters())
-        loaded_params: set[str] = set()
-        for name, loaded_weight in weights:
-            if "rotary_emb.inv_freq" in name:
-                continue
-
-            if "A_log" in name:
-                name = name.replace("A_log", "A")
-
-            if ".self_attn." in name:
-                name = name.replace(".self_attn", "")
-
-            if "feed_forward" in name and not _is_moe_layer(name):
-                ## map MLP layers to expert with ID=0
-                name = name.replace("feed_forward", "feed_forward.experts.0")
-
-            for param_name, weight_name, shard_id in stacked_params_mapping:
-                if weight_name not in name:
-                    continue
-                if 'experts' in name:
-                    continue
-                name = name.replace(weight_name, param_name)
-                # Skip loading extra bias for GPTQ models.
-
-                if name.endswith(".bias") and name not in params_dict:
-                    continue
-                # Skip layers on other devices.
-                if is_pp_missing_parameter(name, self):
-                    continue
-                param = params_dict[name]
-                weight_loader = param.weight_loader
-                weight_loader(param, loaded_weight, shard_id)
-                break
-            else:
-                for (
-                        param_name,
-                        weight_name,
-                        expert_id,
-                        shard_id,
-                ) in expert_params_mapping:
-                    if weight_name not in name:
-                        continue
-
-                    if is_pp_missing_parameter(name, self):
-                        continue
-                    name = name.replace(weight_name, param_name)
-                    param = params_dict[name]
-                    weight_loader = param.weight_loader
-                    weight_loader(param,
-                                  loaded_weight,
-                                  name,
-                                  shard_id=shard_id,
-                                  expert_id=expert_id)
-                    break
-                else:
-                    # Skip loading extra bias for GPTQ models.
-                    if name.endswith(".bias") and name not in params_dict:
-                        continue
-                    if is_pp_missing_parameter(name, self):
-                        continue
-
-                    param = params_dict[name]
-                    weight_loader = getattr(param, "weight_loader",
-                                            default_weight_loader)
-                    weight_loader(param, loaded_weight)
-            loaded_params.add(name)
-        return loaded_params
-
-
-def _is_moe_layer(name: str):
-    return any(
-        [experts_name in name for experts_name in [
-            "experts",
-            "router",
-        ]])
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        return self.model.get_expert_mapping()
 
 
 class JambaForSequenceClassification(JambaForCausalLM):

From 2470419119aa5bc2734b4b8972bbfa348ccdc8b1 Mon Sep 17 00:00:00 2001
From: Kay Yan <kay.yan@daocloud.io>
Date: Tue, 29 Jul 2025 19:56:27 +0800
Subject: [PATCH 012/224] [Docs] Fix the outdated URL for installing from vLLM
 binaries (#21523)

Signed-off-by: Kay Yan <kay.yan@daocloud.io>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 docs/contributing/ci/update_pytorch_version.md    | 3 +--
 docs/getting_started/installation/gpu/cuda.inc.md | 8 ++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/docs/contributing/ci/update_pytorch_version.md b/docs/contributing/ci/update_pytorch_version.md
index 5046db11a4715..699d0531ac768 100644
--- a/docs/contributing/ci/update_pytorch_version.md
+++ b/docs/contributing/ci/update_pytorch_version.md
@@ -57,8 +57,7 @@ cc the PyTorch release team to initiate discussion on how to address them.
 
 ## Update CUDA version
 
-The PyTorch release matrix includes both stable and experimental [CUDA versions](https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix). Due to limitations, only the latest stable CUDA version (for example,
-`torch2.7.0+cu12.6`) is uploaded to PyPI. However, vLLM may require a different CUDA version,
+The PyTorch release matrix includes both stable and experimental [CUDA versions](https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix). Due to limitations, only the latest stable CUDA version (for example, torch `2.7.1+cu126`) is uploaded to PyPI. However, vLLM may require a different CUDA version,
 such as 12.8 for Blackwell support.
 This complicates the process as we cannot use the out-of-the-box
 `pip install torch torchvision torchaudio` command. The solution is to use
diff --git a/docs/getting_started/installation/gpu/cuda.inc.md b/docs/getting_started/installation/gpu/cuda.inc.md
index 5ca5296d0a657..5298c22c8435e 100644
--- a/docs/getting_started/installation/gpu/cuda.inc.md
+++ b/docs/getting_started/installation/gpu/cuda.inc.md
@@ -38,10 +38,10 @@ We recommend leveraging `uv` to [automatically select the appropriate PyTorch in
 As of now, vLLM's binaries are compiled with CUDA 12.8 and public PyTorch release versions by default. We also provide vLLM binaries compiled with CUDA 12.6, 11.8, and public PyTorch release versions:
 
 ```bash
-# Install vLLM with CUDA 11.8.
-export VLLM_VERSION=0.6.1.post1
-export PYTHON_VERSION=312
-uv pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux1_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
+# Install vLLM with a specific CUDA version (e.g., 11.8 or 12.6).
+export VLLM_VERSION=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq -r .tag_name | sed 's/^v//')
+export CUDA_VERSION=118 # or 126
+uv pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cu${CUDA_VERSION}-cp38-abi3-manylinux1_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu${CUDA_VERSION}
 ```
 
 [](){ #install-the-latest-code }

From 755fa8b657e3666cc93b08a6d2b9a50d0f46c37e Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Tue, 29 Jul 2025 04:58:29 -0700
Subject: [PATCH 013/224] [KVCache] Make KVCacheSpec hashable (#21791)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
---
 tests/v1/core/test_kv_cache_utils.py          | 34 +++++++-
 .../v1/e2e/test_correctness_sliding_window.py |  8 +-
 vllm/v1/core/kv_cache_coordinator.py          | 31 ++++---
 vllm/v1/core/kv_cache_utils.py                | 35 ++++----
 vllm/v1/kv_cache_interface.py                 | 80 +++++++------------
 5 files changed, 100 insertions(+), 88 deletions(-)

diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py
index ebe3a30e3352d..e9c6f1f95cd71 100644
--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@@ -17,7 +17,7 @@ from vllm.v1.core.kv_cache_utils import (
     estimate_max_model_len, generate_block_hash_extra_keys,
     get_kv_cache_config, get_max_concurrency_for_kv_cache_config,
     hash_block_tokens, hash_request_tokens, init_none_hash,
-    unify_kv_cache_configs)
+    is_kv_cache_type_uniform, unify_kv_cache_configs)
 from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
                                         KVCacheGroupSpec, KVCacheTensor,
                                         SlidingWindowSpec)
@@ -685,6 +685,38 @@ def test_merge_kv_cache_spec():
     assert merged_layer_spec.sliding_window == 1
 
 
+def test_is_kv_cache_type_uniform():
+    kv_cache_spec = {
+        "layer_1": new_kv_cache_spec(num_kv_heads=32),
+        "layer_2": new_kv_cache_spec(num_kv_heads=32),
+    }
+    assert is_kv_cache_type_uniform(kv_cache_spec)
+
+    kv_cache_spec = {
+        "layer_1": new_kv_cache_spec(num_kv_heads=32),
+        "layer_2": new_kv_cache_spec(num_kv_heads=32, sliding_window=1),
+    }
+    assert is_kv_cache_type_uniform(kv_cache_spec)
+
+    kv_cache_spec = {
+        "layer_1": new_kv_cache_spec(num_kv_heads=32),
+        "layer_2": new_sliding_window_spec(num_kv_heads=32, sliding_window=1),
+    }
+    assert not is_kv_cache_type_uniform(kv_cache_spec)
+
+    kv_cache_spec = {
+        "layer_1": new_sliding_window_spec(num_kv_heads=32, sliding_window=1),
+        "layer_2": new_sliding_window_spec(num_kv_heads=32, sliding_window=1),
+    }
+    assert is_kv_cache_type_uniform(kv_cache_spec)
+
+    kv_cache_spec = {
+        "layer_1": new_sliding_window_spec(num_kv_heads=32, sliding_window=1),
+        "layer_2": new_sliding_window_spec(num_kv_heads=32, sliding_window=2),
+    }
+    assert not is_kv_cache_type_uniform(kv_cache_spec)
+
+
 @pytest.mark.parametrize(
     ("model_id", "max_model_len", "want_estimated_max_len"), [
         ("Qwen/Qwen1.5-7B", 16385, 16384),
diff --git a/tests/v1/e2e/test_correctness_sliding_window.py b/tests/v1/e2e/test_correctness_sliding_window.py
index 277ea3c838505..4dfe1d3bb33fa 100644
--- a/tests/v1/e2e/test_correctness_sliding_window.py
+++ b/tests/v1/e2e/test_correctness_sliding_window.py
@@ -30,7 +30,9 @@ model_config = {
     ])
 @pytest.mark.parametrize("batch_size", [5])
 @pytest.mark.parametrize("seed", [1])
-def test_sliding_window_retrieval(monkeypatch, model, batch_size, seed):
+@pytest.mark.parametrize("disable_hybrid_kv_cache_manager", [True, False])
+def test_sliding_window_retrieval(monkeypatch, model, batch_size, seed,
+                                  disable_hybrid_kv_cache_manager):
     """
     The test does a bunch of assignments "x1 = 10\nx2 = 33\n..." and then
     asks for value of one of them (which is outside the sliding window).
@@ -42,7 +44,9 @@ def test_sliding_window_retrieval(monkeypatch, model, batch_size, seed):
 
         test_config = model_config[model]
 
-        llm = LLM(model=model)
+        llm = LLM(
+            model=model,
+            disable_hybrid_kv_cache_manager=disable_hybrid_kv_cache_manager)
         sampling_params = SamplingParams(temperature=0.0, max_tokens=100)
 
         prompts, answer, indices = prep_prompts(batch_size,
diff --git a/vllm/v1/core/kv_cache_coordinator.py b/vllm/v1/core/kv_cache_coordinator.py
index de72e60434ad7..0cce2ec81e08a 100644
--- a/vllm/v1/core/kv_cache_coordinator.py
+++ b/vllm/v1/core/kv_cache_coordinator.py
@@ -7,7 +7,8 @@ from vllm.v1.core.block_pool import BlockPool
 from vllm.v1.core.kv_cache_utils import BlockHash, KVCacheBlock
 from vllm.v1.core.single_type_kv_cache_manager import (
     FullAttentionManager, get_manager_for_kv_cache_spec)
-from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheConfig
+from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
+                                        KVCacheSpec)
 from vllm.v1.request import Request
 
 
@@ -258,44 +259,40 @@ class HybridKVCacheCoordinator(KVCacheCoordinator):
         one of them is full attention. Then, split the kv cache groups into full
         attention groups and other groups.
         """
-        full_attention_type_id: Optional[str] = None
-        other_type_id: Optional[str] = None
+        full_attention_spec: Optional[FullAttentionSpec] = None
+        other_spec: Optional[KVCacheSpec] = None
         self.full_attention_group_ids: list[int] = []
         self.other_group_ids: list[int] = []
         for i, g in enumerate(self.kv_cache_config.kv_cache_groups):
             if isinstance(g.kv_cache_spec, FullAttentionSpec):
-                if full_attention_type_id is None:
-                    full_attention_type_id = g.kv_cache_spec.type_id
+                if full_attention_spec is None:
+                    full_attention_spec = g.kv_cache_spec
                 else:
-                    assert full_attention_type_id == g.kv_cache_spec.type_id, (
+                    assert full_attention_spec == g.kv_cache_spec, (
                         "HybridKVCacheCoordinator assumes exactly one type of "
                         "full attention groups now.")
                 self.full_attention_group_ids.append(i)
             else:
-                if other_type_id is None:
-                    other_type_id = g.kv_cache_spec.type_id
+                if other_spec is None:
+                    other_spec = g.kv_cache_spec
                 else:
-                    assert other_type_id == g.kv_cache_spec.type_id, (
+                    assert other_spec == g.kv_cache_spec, (
                         "HybridKVCacheCoordinator assumes "
                         "exactly one other type of groups now.")
                 self.other_group_ids.append(i)
 
-        assert full_attention_type_id is not None, (
+        assert full_attention_spec is not None, (
             "HybridKVCacheCoordinator assumes exactly one type of full "
             "attention groups now.")
-        assert other_type_id is not None, (
+        assert other_spec is not None, (
             "HybridKVCacheCoordinator assumes exactly one type of other "
             "groups now.")
 
         self.full_attention_manager_cls = FullAttentionManager
         self.other_attention_cls = self.single_type_managers[
             self.other_group_ids[0]].__class__
-
-        self.full_attention_spec = self.kv_cache_config.kv_cache_groups[
-            self.full_attention_group_ids[0]].kv_cache_spec
-        self.other_spec = self.kv_cache_config.kv_cache_groups[
-            self.other_group_ids[0]].kv_cache_spec
-
+        self.full_attention_spec = full_attention_spec
+        self.other_spec = other_spec
         self.full_attention_block_size = self.full_attention_spec.block_size
         self.other_block_size = self.other_spec.block_size
 
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 5b0218640a8c8..3a72ac271afa6 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -5,7 +5,7 @@
 import os
 from collections import defaultdict, deque
 from collections.abc import Iterable, Sequence
-from dataclasses import dataclass
+from dataclasses import astuple, dataclass
 from typing import Any, Callable, NamedTuple, Optional
 
 from vllm.config import VllmConfig
@@ -727,7 +727,9 @@ def create_kv_cache_group_specs(
 
 def is_kv_cache_type_uniform(kv_cache_spec: dict[str, KVCacheSpec]) -> bool:
     """
-    Whether all layers in the given KVCacheSpec have the same type of KV cache.
+    Whether all layers in the given KVCacheSpec have the same KV cache spec.
+    Note that we regard FullAttentionSpec with and without sliding window as
+    the same type.
 
     Args:
         kv_cache_spec: The kv cache spec of each attention layer in the model
@@ -736,8 +738,12 @@ def is_kv_cache_type_uniform(kv_cache_spec: dict[str, KVCacheSpec]) -> bool:
         True if all layers have the same type, False otherwise.
     """
 
-    layer_keys = set(layer.type_id for layer in kv_cache_spec.values())
-    return len(layer_keys) == 1
+    try:
+        kv_cache_spec_values = list(kv_cache_spec.values())
+        _ = kv_cache_spec_values[0].merge(kv_cache_spec_values)
+    except AssertionError:
+        return False
+    return True
 
 
 def get_max_concurrency_for_kv_cache_config(
@@ -928,12 +934,12 @@ def _get_kv_cache_config_uniform_page_size(
     Returns:
         The generated KVCacheConfig
     """
-    # Group all layers by type_id.
+    # Group all layers by kv_cache_spec.
     # E.g., 2 full attention layers and 3 sliding window attention layers,
     # -> (full.0, full.1), (sw.0, sw.1, sw.2).
-    same_type_layers: dict[str, list[str]] = defaultdict(list)
+    same_type_layers: dict[KVCacheSpec, list[str]] = defaultdict(list)
     for layer_name, layer_spec in kv_cache_spec.items():
-        same_type_layers[layer_spec.type_id].append(layer_name)
+        same_type_layers[layer_spec].append(layer_name)
 
     # Split each group into smaller groups, to make the number of layers in each
     # group identical. Add padding to the last group of each type if necessary.
@@ -1017,12 +1023,7 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]):
         kv_cache_spec: The kv cache spec of each attention layer in the model
     """
 
-    def is_hybrid(kv_cache_spec: dict[str, KVCacheSpec]) -> bool:
-        type_ids = set(layer_spec.type_id
-                       for layer_spec in kv_cache_spec.values())
-        return len(type_ids) > 1
-
-    if not is_hybrid(kv_cache_spec):
+    if is_kv_cache_type_uniform(kv_cache_spec):
         return
 
     logger.warning(
@@ -1060,7 +1061,7 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]):
                     attention_chunk_size=spec.attention_chunk_size,
                 )
 
-    if is_hybrid(kv_cache_spec):
+    if not is_kv_cache_type_uniform(kv_cache_spec):
         raise ValueError("Hybrid KV cache manager is disabled but failed to "
                          "convert the KV cache specs to one unified type.")
 
@@ -1119,11 +1120,11 @@ def unify_kv_cache_configs(kv_cache_configs: list[KVCacheConfig]):
             in-place modified to make them consistent.
     """
 
-    # Sort the kv cache groups by the type_id of their KV cache spec.
+    # Sort the kv cache groups by their KV cache spec.
     # This can avoid the inconsistency caused by the order of groups.
     for kv_cache_config in kv_cache_configs:
-        kv_cache_config.kv_cache_groups.sort(
-            key=lambda x: x.kv_cache_spec.type_id)
+        kv_cache_config.kv_cache_groups.sort(key=lambda x: (type(
+            x.kv_cache_spec).__name__, astuple(x.kv_cache_spec)))
 
     # Verify that the groups of each rank are the same.
     for kv_cache_config in kv_cache_configs[1:]:
diff --git a/vllm/v1/kv_cache_interface.py b/vllm/v1/kv_cache_interface.py
index 1da5230116d26..4ff96f9786b88 100644
--- a/vllm/v1/kv_cache_interface.py
+++ b/vllm/v1/kv_cache_interface.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import copy
-from dataclasses import dataclass
+from dataclasses import dataclass, fields
 from math import prod
 from typing import Optional
 
@@ -16,7 +16,7 @@ from vllm.utils import cdiv, get_dtype_size
 logger = init_logger(__name__)
 
 
-@dataclass
+@dataclass(frozen=True)
 class KVCacheSpec:
     """
     A base class for specifying the KV cache format of one layer.
@@ -25,20 +25,6 @@ class KVCacheSpec:
     # number of tokens in a block
     block_size: int
 
-    @property
-    def type_id(self) -> str:
-        """
-        The type identifier of this KV cache.
-        Return different strings for layers with different KV cache type (e.g.,
-        different number of tokens like full attention vs sliding window
-        attention, different KV cache size per token like layers with different
-        number of heads)
-
-        Returns:
-            The type identifier of this KV cache.
-        """
-        raise NotImplementedError
-
     @property
     def page_size_bytes(self) -> int:
         """
@@ -63,13 +49,12 @@ class KVCacheSpec:
         """
         Merge a list of KVCacheSpec objects into a single KVCacheSpec object.
         """
-        assert all(spec.type_id == specs[0].type_id for spec in specs[1:]), (
-            "All layers in the same KV cache group must share the same "
-            "type_id.")
+        assert all(spec == specs[0] for spec in specs[1:]), (
+            "All layers in the same KV cache group must be the same.")
         return copy.deepcopy(specs[0])
 
 
-@dataclass
+@dataclass(frozen=True)
 class AttentionSpec(KVCacheSpec):
     num_kv_heads: int
     head_size: int
@@ -84,7 +69,7 @@ class AttentionSpec(KVCacheSpec):
                 * get_dtype_size(self.dtype)
 
 
-@dataclass
+@dataclass(frozen=True)
 class FullAttentionSpec(AttentionSpec):
     sliding_window: Optional[int] = None
     attention_chunk_size: Optional[int] = None
@@ -98,10 +83,6 @@ class FullAttentionSpec(AttentionSpec):
     Default to None for not using sliding window attention.
     """
 
-    @property
-    def type_id(self) -> str:
-        return f"full_attention_{self.block_size}_{self.page_size_bytes}"
-
     def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
         max_model_len = vllm_config.model_config.max_model_len
         return cdiv(max_model_len, self.block_size) * self.page_size_bytes
@@ -123,15 +104,28 @@ class FullAttentionSpec(AttentionSpec):
         Merge a list of FullAttentionSpec objects into a single 
         FullAttentionSpec object.
         """
-        merged_spec = super().merge(specs)
+        assert all(isinstance(spec, FullAttentionSpec) for spec in specs), (
+            "All attention layers in the same KV cache group must be "
+            "FullAttentionSpec.")
+
         sliding_window = set(spec.sliding_window for spec in specs
                              if spec.sliding_window is not None)
         attention_chunk_size = set(spec.attention_chunk_size for spec in specs
                                    if spec.attention_chunk_size is not None)
-
-        merged_spec.sliding_window = cls.merge_window_sizes(sliding_window)
-        merged_spec.attention_chunk_size = (
-            cls.merge_window_sizes(attention_chunk_size))
+        merged_spec = cls(
+            block_size=specs[0].block_size,
+            num_kv_heads=specs[0].num_kv_heads,
+            head_size=specs[0].head_size,
+            dtype=specs[0].dtype,
+            use_mla=specs[0].use_mla,
+            sliding_window=cls.merge_window_sizes(sliding_window),
+            attention_chunk_size=cls.merge_window_sizes(attention_chunk_size),
+        )
+        for spec in specs:
+            for f in fields(AttentionSpec):
+                assert getattr(spec, f.name) == getattr(merged_spec, f.name), (
+                    "All attention layers in the same KV cache group must have "
+                    "the same attention spec.")
         assert (
             (merged_spec.sliding_window is not None) +
             (merged_spec.attention_chunk_size is not None) <= 1
@@ -140,16 +134,10 @@ class FullAttentionSpec(AttentionSpec):
         return merged_spec
 
 
-@dataclass
+@dataclass(frozen=True)
 class ChunkedLocalAttentionSpec(AttentionSpec):
     attention_chunk_size: int
 
-    @property
-    def type_id(self) -> str:
-        return (
-            f"local_attention_{self.attention_chunk_size}_{self.block_size}_{self.page_size_bytes}"
-        )  # noqa
-
     def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
         max_model_len = vllm_config.model_config.max_model_len
         max_num_batched_tokens = (
@@ -165,17 +153,13 @@ class ChunkedLocalAttentionSpec(AttentionSpec):
         return cdiv(num_tokens, self.block_size) * self.page_size_bytes
 
 
-@dataclass
+@dataclass(frozen=True)
 class SlidingWindowSpec(AttentionSpec):
     sliding_window: int
 
     def __post_init__(self):
         assert not self.use_mla, "MLA is not supported for sliding window"
 
-    @property
-    def type_id(self) -> str:
-        return f"sliding_window_{self.sliding_window}_{self.block_size}_{self.page_size_bytes}"  # noqa
-
     def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
         max_model_len = vllm_config.model_config.max_model_len
         max_num_batched_tokens = (
@@ -195,23 +179,17 @@ class SlidingWindowSpec(AttentionSpec):
         return (cdiv(num_tokens, self.block_size) + 1) * self.page_size_bytes
 
 
-@dataclass
+@dataclass(frozen=True)
 class MambaSpec(KVCacheSpec):
     shapes: tuple[tuple[int, ...], ...]
     dtype: torch.dtype
     page_size_padded: Optional[int] = None
     mamba_type: str = "mamba2"
 
-    def __post_init__(self):
-        self.num_elements = sum(prod(shape) for shape in self.shapes)
-
-    @property
-    def type_id(self) -> str:
-        return f"mamba_{self.shapes}_{self.dtype}_{self.mamba_type}"
-
     @property
     def page_size_bytes(self) -> int:
-        page_size = self.num_elements * get_dtype_size(self.dtype)
+        num_elements = sum(prod(shape) for shape in self.shapes)
+        page_size = num_elements * get_dtype_size(self.dtype)
         if self.page_size_padded is not None:
             assert self.page_size_padded >= page_size
             return self.page_size_padded

From ab714131e4a83469e8bebaf456853aa73b51324d Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Tue, 29 Jul 2025 21:29:51 +0800
Subject: [PATCH 014/224] [Doc] Update compatibility matrix for pooling and
 multimodal models (#21831)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 docs/features/compatibility_matrix.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/features/compatibility_matrix.md b/docs/features/compatibility_matrix.md
index 8be1585f8e76b..259a447984cb0 100644
--- a/docs/features/compatibility_matrix.md
+++ b/docs/features/compatibility_matrix.md
@@ -34,23 +34,25 @@ th:not(:first-child) {
 }
 </style>
 
-| Feature | [CP][chunked-prefill] | [APC](automatic_prefix_caching.md) | [LoRA](lora.md) | [SD](spec_decode.md) | CUDA graph | <abbr title="Pooling Models">pooling</abbr> | <abbr title="Encoder-Decoder Models">enc-dec</abbr> | <abbr title="Logprobs">logP</abbr> | <abbr title="Prompt Logprobs">prmpt logP</abbr> | <abbr title="Async Output Processing">async output</abbr> | multi-step | <abbr title="Multimodal Inputs">mm</abbr> | best-of | beam-search |
+| Feature | [CP][chunked-prefill] | [APC](automatic_prefix_caching.md) | [LoRA](lora.md) | [SD](spec_decode.md) | CUDA graph | [pooling](../models/pooling_models.md) | <abbr title="Encoder-Decoder Models">enc-dec</abbr> | <abbr title="Logprobs">logP</abbr> | <abbr title="Prompt Logprobs">prmpt logP</abbr> | <abbr title="Async Output Processing">async output</abbr> | multi-step | <abbr title="Multimodal Inputs">mm</abbr> | best-of | beam-search |
 |---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
 | [CP][chunked-prefill] | ✅ | | | | | | | | | | | | | | |
 | [APC](automatic_prefix_caching.md) | ✅ | ✅ | | | | | | | | | | | | | |
 | [LoRA](lora.md) | ✅ | ✅ | ✅ | | | | | | | | | | | | |
 | [SD](spec_decode.md) | ✅ | ✅ | ❌ | ✅ | | | | | | | | | | |
 | CUDA graph | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | | |
-| <abbr title="Pooling Models">pooling</abbr> | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | | | | | | | | |
+| [pooling](../models/pooling_models.md) | ✅\* | ✅\* | ✅ | ❌ | ✅ | ✅ | | | | | | | | |
 | <abbr title="Encoder-Decoder Models">enc-dec</abbr> | ❌ | [❌](gh-issue:7366) | ❌ | [❌](gh-issue:7366) | ✅ | ✅ | ✅ | | | | | | | |
 | <abbr title="Logprobs">logP</abbr> | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | | | | | | |
 | <abbr title="Prompt Logprobs">prmpt logP</abbr> | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | | | | | |
 | <abbr title="Async Output Processing">async output</abbr> | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | | | | |
 | multi-step | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | | | |
-| <abbr title="Multimodal Inputs">mm</abbr> | ✅ | [🟠](gh-pr:8348) | [🟠](gh-pr:4194) | ❔ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | | |
+| [mm](multimodal_inputs.md) | ✅ | ✅ | [🟠](gh-pr:4194) | ❔ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | | |
 | best-of | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ✅ | ✅ | |
 | beam-search | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ❔ | ✅ | ✅ |
 
+\* Chunked prefill and prefix caching are only applicable to last-token pooling.
+
 [](){ #feature-x-hardware }
 
 ## Feature x Hardware
@@ -62,9 +64,9 @@ th:not(:first-child) {
 | [LoRA](lora.md)                                           | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     | ✅ |
 | [SD](spec_decode.md)                                      | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     | ❌ |
 | CUDA graph                                                | ✅                  | ✅        | ✅        | ✅     | ✅        | ❌                  | ✅     | ❌ |
-| <abbr title="Pooling Models">pooling</abbr>               | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ❔     | ❌ |
+| [pooling](../models/pooling_models.md)                    | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     | ❌ |
 | <abbr title="Encoder-Decoder Models">enc-dec</abbr>       | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ❌     | ❌ |
-| <abbr title="Multimodal Inputs">mm</abbr>                 | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     | ❌ |
+| [mm](multimodal_inputs.md)                                | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     | ❌ |
 | <abbr title="Logprobs">logP</abbr>                        | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     | ❌ |
 | <abbr title="Prompt Logprobs">prmpt logP</abbr>           | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     | ❌ |
 | <abbr title="Async Output Processing">async output</abbr> | ✅                  | ✅        | ✅        | ✅     | ✅        | ❌                  | ❌     | ❌ |

From 04e38500eeaa683f107fc16011aee65981afc6cd Mon Sep 17 00:00:00 2001
From: Richard Zou <zou3519@users.noreply.github.com>
Date: Tue, 29 Jul 2025 09:35:58 -0400
Subject: [PATCH 015/224] [Bugfix] VLLM_V1 supports passing other compilation
 levels (#19340)

Signed-off-by: Richard Zou <zou3519@gmail.com>
---
 tests/compile/test_config.py       | 55 ++++++++++++++++++++++++++++--
 vllm/compilation/counter.py        |  2 ++
 vllm/config.py                     | 21 ++++++++++--
 vllm/v1/worker/gpu_model_runner.py | 13 ++++++-
 vllm/worker/model_runner.py        |  2 ++
 5 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py
index 0ba59f4b5a056..90e8e0ff95858 100644
--- a/tests/compile/test_config.py
+++ b/tests/compile/test_config.py
@@ -26,6 +26,8 @@ def test_use_cudagraphs_dynamic(monkeypatch):
     assert not vllm_config.compilation_config.use_cudagraph
 
 
+# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
+@pytest.mark.forked
 # NB: We don't test VLLM_DISABLE_COMPILE_CACHE=0 because that depends
 # on the state of the cache directory on the current machine, which
 # may be influenced by other tests.
@@ -33,8 +35,8 @@ def test_use_cudagraphs_dynamic(monkeypatch):
 def test_VLLM_DISABLE_COMPILE_CACHE(vllm_runner, monkeypatch, val):
     assert vllm.envs.VLLM_USE_V1
 
-    # spawn means that the counters are in the same process.
-    monkeypatch.setenv('VLLM_WORKER_MULTIPROC_METHOD', "spawn")
+    # Disable multiprocessing so that the counter is in the same process
+    monkeypatch.setenv('VLLM_ENABLE_V1_MULTIPROCESSING', '0')
     monkeypatch.setenv('VLLM_DISABLE_COMPILE_CACHE', val)
 
     compilation_config = {
@@ -50,6 +52,8 @@ def test_VLLM_DISABLE_COMPILE_CACHE(vllm_runner, monkeypatch, val):
         pass
 
 
+# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
+@pytest.mark.forked
 @pytest.mark.parametrize("enabled", [True, False])
 def test_use_cudagraphs(vllm_runner, monkeypatch, enabled):
     assert vllm.envs.VLLM_USE_V1
@@ -72,3 +76,50 @@ def test_use_cudagraphs(vllm_runner, monkeypatch, enabled):
                         compilation_config=compilation_config,
                         gpu_memory_utilization=0.4) as _):
         pass
+
+
+# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
+@pytest.mark.forked
+def test_dynamo_as_is(vllm_runner, monkeypatch):
+    # Disable multiprocessing so that the counter is in the same process
+    monkeypatch.setenv('VLLM_ENABLE_V1_MULTIPROCESSING', '0')
+
+    with (
+            compilation_counter.expect(dynamo_as_is_count=1),
+            # loading the model causes compilation (if enabled) to happen
+            vllm_runner('facebook/opt-125m',
+                        compilation_config={"level": 1},
+                        gpu_memory_utilization=0.4) as _):
+        pass
+
+
+# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
+@pytest.mark.forked
+def test_no_compilation(vllm_runner, monkeypatch):
+    # Disable multiprocessing so that the counter is in the same process
+    monkeypatch.setenv('VLLM_ENABLE_V1_MULTIPROCESSING', '0')
+
+    with (
+            compilation_counter.expect(num_graphs_seen=0,
+                                       dynamo_as_is_count=0),
+            # loading the model causes compilation (if enabled) to happen
+            vllm_runner('facebook/opt-125m',
+                        compilation_config={"level": 0},
+                        gpu_memory_utilization=0.4) as _):
+        pass
+
+
+# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
+@pytest.mark.forked
+def test_enforce_eager(vllm_runner, monkeypatch):
+    # Disable multiprocessing so that the counter is in the same process
+    monkeypatch.setenv('VLLM_ENABLE_V1_MULTIPROCESSING', '0')
+
+    with (
+            compilation_counter.expect(num_graphs_seen=0,
+                                       dynamo_as_is_count=0),
+            # loading the model causes compilation (if enabled) to happen
+            vllm_runner('facebook/opt-125m',
+                        enforce_eager=True,
+                        gpu_memory_utilization=0.4) as _):
+        pass
diff --git a/vllm/compilation/counter.py b/vllm/compilation/counter.py
index 6acb8abb3deb1..e01dd3915a3a1 100644
--- a/vllm/compilation/counter.py
+++ b/vllm/compilation/counter.py
@@ -27,6 +27,8 @@ class CompilationCounter:
     num_cache_entries_updated: int = 0
     # The number of standalone_compile compiled artifacts saved
     num_compiled_artifacts_saved: int = 0
+    # Number of times a model was loaded with CompilationLevel.DYNAMO_AS_IS
+    dynamo_as_is_count: int = 0
 
     def clone(self) -> "CompilationCounter":
         return copy.deepcopy(self)
diff --git a/vllm/config.py b/vllm/config.py
index 7ae615f477057..86c3b9eae64cb 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -4106,9 +4106,11 @@ class CompilationConfig:
         certain small batchsizes, where inductor is good at optimizing.
     """
     # Top-level Compilation control
-    level: int = 0
+    level: Optional[int] = None
     """The level of compilation:
 
+    - None: If None, we will select the default compilation level.
+      For V1 engine this is 3, for V0 engine this is 0.
     - 0: no compilation.
     - 1: dynamo as is.
     - 2: dynamo once.
@@ -4664,6 +4666,22 @@ class VllmConfig:
                 "To workaround this limitation, vLLM will set 'ieee' input "
                 "precision for chunked prefill triton kernels.")
 
+        # If the user does not explicitly set a compilation level, then
+        # we use the default level. The default level depends on other
+        # settings (see the below code).
+        if self.compilation_config.level is None:
+            if envs.VLLM_USE_V1:
+                if (self.model_config is not None
+                        and not self.model_config.enforce_eager):
+                    self.compilation_config.level = CompilationLevel.PIECEWISE
+                else:
+                    self.compilation_config.level = \
+                            CompilationLevel.NO_COMPILATION
+            else:
+                # NB: Passing both --enforce-eager and a compilation level
+                # in V0 means the compilation level wins out.
+                self.compilation_config.level = CompilationLevel.NO_COMPILATION
+
         # async tp is built on top of sequence parallelism
         # and requires it to be enabled.
         if self.compilation_config.pass_config.enable_async_tp:
@@ -4676,7 +4694,6 @@ class VllmConfig:
             # By default, V1 uses piecewise CUDA graphs. If full_cuda_graph
             # is set to True, full CUDA graphs will be used.
             self.compilation_config.cudagraph_num_of_warmups = 1
-            self.compilation_config.level = CompilationLevel.PIECEWISE
             self.compilation_config.set_splitting_ops_for_v1()
 
         self._set_cudagraph_sizes()
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index fc55d09fc97e7..84ad582c9c9de 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -43,7 +43,7 @@ from vllm.sequence import IntermediateTensors, PoolerOutput
 from vllm.tasks import GenerationTask, PoolingTask, SupportedTask
 from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
                         GiB_bytes, LazyLoader, check_use_alibi, get_dtype_size,
-                        is_pin_memory_available, round_up)
+                        is_pin_memory_available, round_up, supports_dynamo)
 from vllm.v1.attention.backends.mamba_selectors import get_mamba_attn_backend
 from vllm.v1.attention.backends.utils import (
     AttentionMetadataBuilder, CommonAttentionMetadata,
@@ -1930,6 +1930,17 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 rank_mapping,
             )
 
+        if (
+            self.vllm_config.compilation_config.level == \
+                CompilationLevel.DYNAMO_AS_IS and supports_dynamo()
+        ):
+            backend = self.vllm_config.compilation_config.init_backend(
+                self.vllm_config)
+            compilation_counter.dynamo_as_is_count += 1
+            self.model.compile(
+                fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
+                backend=backend)
+
     def reload_weights(self) -> None:
         assert getattr(self, "model", None) is not None, \
             "Cannot reload weights before model is loaded."
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 5a185e7451ade..20b9b733cd3b9 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -22,6 +22,7 @@ import vllm.envs as envs
 from vllm.attention import AttentionMetadata, get_attn_backend
 from vllm.attention.backends.abstract import AttentionState
 from vllm.attention.backends.utils import CommonAttentionState
+from vllm.compilation.counter import compilation_counter
 from vllm.config import CompilationLevel, VllmConfig
 from vllm.core.scheduler import SchedulerOutputs
 from vllm.distributed import broadcast_tensor_dict, get_pp_group
@@ -1121,6 +1122,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
             CompilationLevel.DYNAMO_AS_IS and supports_dynamo():
             backend = self.vllm_config.compilation_config.init_backend(
                 self.vllm_config)
+            compilation_counter.dynamo_as_is_count += 1
             self.model = torch.compile(
                 self.model,
                 fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,

From f693b067a28768e16534cfd49672c020c41071b0 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Tue, 29 Jul 2025 15:22:50 +0100
Subject: [PATCH 016/224] [Docs] Merge design docs for a V1 only future
 (#21832)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 docs/.nav.yml                               |  4 +-
 docs/design/automatic_prefix_caching.md     | 40 ---------------
 docs/design/huggingface_integration.md      |  2 +-
 docs/design/{v1 => }/metrics.md             |  0
 docs/design/{v1 => }/multiprocessing.md     |  0
 docs/design/{v1 => }/p2p_nccl_connector.md  | 56 +++++++++++----------
 docs/design/{kernel => }/paged_attention.md |  4 ++
 docs/design/{v1 => }/prefix_caching.md      |  0
 docs/design/{v1 => }/torch_compile.md       |  0
 9 files changed, 35 insertions(+), 71 deletions(-)
 delete mode 100644 docs/design/automatic_prefix_caching.md
 rename docs/design/{v1 => }/metrics.md (100%)
 rename docs/design/{v1 => }/multiprocessing.md (100%)
 rename docs/design/{v1 => }/p2p_nccl_connector.md (95%)
 rename docs/design/{kernel => }/paged_attention.md (99%)
 rename docs/design/{v1 => }/prefix_caching.md (100%)
 rename docs/design/{v1 => }/torch_compile.md (100%)

diff --git a/docs/.nav.yml b/docs/.nav.yml
index ab54dc3e535bd..ad742be3d6947 100644
--- a/docs/.nav.yml
+++ b/docs/.nav.yml
@@ -56,9 +56,7 @@ nav:
       - contributing/model/tests.md
       - contributing/model/multimodal.md
     - CI: contributing/ci
-    - Design Documents:
-      - V0: design
-      - V1: design/v1
+    - Design Documents: design
   - API Reference:
     - Summary: api/README.md
     - Contents:
diff --git a/docs/design/automatic_prefix_caching.md b/docs/design/automatic_prefix_caching.md
deleted file mode 100644
index 60e21f6ad0fcb..0000000000000
--- a/docs/design/automatic_prefix_caching.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Automatic Prefix Caching
-
-The core idea of [PagedAttention](https://blog.vllm.ai/2023/06/20/vllm.html) is to partition the KV cache of each request into KV Blocks. Each block contains the attention keys and values for a fixed number of tokens. The PagedAttention algorithm allows these blocks to be stored in non-contiguous physical memory so that we can eliminate memory fragmentation by allocating the memory on demand.
-
-To automatically cache the KV cache, we utilize the following key observation: Each KV block can be uniquely identified by the tokens within the block and the tokens in the prefix before the block.
-
-```text
-                    Block 1                  Block 2                  Block 3
-         [A gentle breeze stirred] [the leaves as children] [laughed in the distance]
-Block 1: |<--- block tokens ---->|
-Block 2: |<------- prefix ------>| |<--- block tokens --->|
-Block 3: |<------------------ prefix -------------------->| |<--- block tokens ---->|
-```
-
-In the example above, the KV cache in the first block can be uniquely identified with the tokens “A gentle breeze stirred”. The third block can be uniquely identified with the tokens in the block “laughed in the distance”, along with the prefix tokens “A gentle breeze stirred the leaves as children”. Therefore, we can build the following one-to-one mapping:
-
-```text
-hash(prefix tokens + block tokens) <--> KV Block
-```
-
-With this mapping, we can add another indirection in vLLM’s KV cache management. Previously, each sequence in vLLM maintained a mapping from their logical KV blocks to physical blocks. To achieve automatic caching of KV blocks, we map the logical KV blocks to their hash value and maintain a global hash table of all the physical blocks. In this way, all the KV blocks sharing the same hash value (e.g., shared prefix blocks across two requests) can be mapped to the same physical block and share the memory space.
-
-This design achieves automatic prefix caching without the need of maintaining a tree structure among the KV blocks. More specifically, all of the blocks are independent of each other and can be allocated and freed by itself, which enables us to manages the KV cache as ordinary caches in operating system.
-
-## Generalized Caching Policy
-
-Keeping all the KV blocks in a hash table enables vLLM to cache KV blocks from earlier requests to save memory and accelerate the computation of future requests. For example, if a new request shares the system prompt with the previous request, the KV cache of the shared prompt can directly be used for the new request without recomputation. However, the total KV cache space is limited and we have to decide which KV blocks to keep or evict when the cache is full.
-
-Managing KV cache with a hash table allows us to implement flexible caching policies. As an example, in current vLLM, we implement the following eviction policy:
-
-* When there are no free blocks left, we will evict a KV block with reference count (i.e., number of current requests using the block) equals 0.
-* If there are multiple blocks with reference count equals to 0, we prioritize to evict the least recently used block (LRU).
-* If there are multiple blocks whose last access time are the same, we prioritize the eviction of the block that is at the end of the longest prefix (i.e., has the maximum number of blocks before it).
-
-Note that this eviction policy effectively implements the exact policy as in [RadixAttention](https://lmsys.org/blog/2024-01-17-sglang/) when applied to models with full attention, which prioritizes to evict reference count zero and least recent used leaf nodes in the prefix tree.
-
-However, the hash-based KV cache management gives us the flexibility to handle more complicated serving scenarios and implement more complicated eviction policies beyond the policy above:
-
-* Multi-LoRA serving. When serving requests for multiple LoRA adapters, we can simply let the hash of each KV block to also include the LoRA ID the request is querying for to enable caching for all adapters. In this way, we can jointly manage the KV blocks for different adapters, which simplifies the system implementation and improves the global cache hit rate and efficiency.
-* Multi-modal models. When the user input includes more than just discrete tokens, we can use different hashing methods to handle the caching of inputs of different modalities. For example, perceptual hashing for images to cache similar input images.
diff --git a/docs/design/huggingface_integration.md b/docs/design/huggingface_integration.md
index 7b01313ddb00a..5a7582c86d49f 100644
--- a/docs/design/huggingface_integration.md
+++ b/docs/design/huggingface_integration.md
@@ -1,4 +1,4 @@
-# Integration with HuggingFace
+# Integration with Hugging Face
 
 This document describes how vLLM integrates with HuggingFace libraries. We will explain step by step what happens under the hood when we run `vllm serve`.
 
diff --git a/docs/design/v1/metrics.md b/docs/design/metrics.md
similarity index 100%
rename from docs/design/v1/metrics.md
rename to docs/design/metrics.md
diff --git a/docs/design/v1/multiprocessing.md b/docs/design/multiprocessing.md
similarity index 100%
rename from docs/design/v1/multiprocessing.md
rename to docs/design/multiprocessing.md
diff --git a/docs/design/v1/p2p_nccl_connector.md b/docs/design/p2p_nccl_connector.md
similarity index 95%
rename from docs/design/v1/p2p_nccl_connector.md
rename to docs/design/p2p_nccl_connector.md
index 9d334f8873d97..082dff15ef2c8 100644
--- a/docs/design/v1/p2p_nccl_connector.md
+++ b/docs/design/p2p_nccl_connector.md
@@ -1,8 +1,10 @@
+# P2P NCCL Connector
+
 An implementation of xPyD with dynamic scaling based on point-to-point communication, partly inspired by Dynamo.
 
-# Detailed Design
+## Detailed Design
 
-## Overall Process
+### Overall Process
 As shown in Figure 1, the overall process of this **PD disaggregation** solution is described through a request flow:
 
 1. The client sends an HTTP request to the Proxy/Router's `/v1/completions` interface.
@@ -15,7 +17,7 @@ As shown in Figure 1, the overall process of this **PD disaggregation** solution
 
 ![image1](https://github.com/user-attachments/assets/fb01bde6-755b-49f7-ad45-48a94b1e10a7)
 
-## Proxy/Router (Demo)
+### Proxy/Router (Demo)
 
 A simple HTTP service acts as the entry point for client requests and starts a background thread to listen for P/D instances reporting their HTTP IP and PORT, as well as ZMQ IP and PORT. It maintains a dictionary of `http_addr -> zmq_addr`. The `http_addr` is the IP:PORT for the vLLM instance's request, while the `zmq_addr` is the address for KV cache handshake and metadata reception.
 
@@ -29,13 +31,13 @@ Currently, to quickly verify whether xPyD can work, a round-robin selection of 1
 
 Each P/D instance periodically sends a heartbeat packet to the Proxy/Router (currently every 3 seconds) to register (i.e., report `http_addr -> zmq_addr`) and keep the connection alive. If an instance crashes and fails to send a ping for a certain period of time, the Proxy/Router will remove the timed-out instance (this feature has not yet been developed).
 
-## KV Cache Transfer Methods
+### KV Cache Transfer Methods
 
 There are three methods for KVCache transfer: PUT, GET, and PUT_ASYNC. These methods can be specified using the `--kv-transfer-config` and `kv_connector_extra_config` parameters, specifically through the `send_type` field. Both PUT and PUT_ASYNC involve the P instance actively sending KVCache to the D instance. The difference is that PUT is a synchronous transfer method that blocks the main process, while PUT_ASYNC is an asynchronous transfer method. PUT_ASYNC uses a dedicated thread for sending KVCache, which means it does not block the main process. In contrast, the GET method involves the P instance saving the KVCache to the memory buffer after computing the prefill. The D instance then actively retrieves the computed KVCache from the P instance once it has allocated space for the KVCache.
 
 Experimental results have shown that the performance of these methods, from highest to lowest, is as follows: PUT_ASYNC → GET → PUT.
 
-## P2P Communication via ZMQ & NCCL
+### P2P Communication via ZMQ & NCCL
 
 As long as the address of the counterpart is known, point-to-point KV cache transfer (using NCCL) can be performed, without being constrained by rank and world size. To support dynamic scaling (expansion and contraction) of instances with PD disaggregation. This means that adding or removing P/D instances does not require a full system restart.
 
@@ -43,7 +45,7 @@ Each P/D instance only needs to create a single `P2pNcclEngine` instance. This i
 
 When a P instance and a D instance transmit KVCache for the first time, they need to establish a ZMQ connection and an NCCL group. For subsequent KVCache transmissions, this ZMQ connection and NCCL group are reused. The NCCL group consists of only two ranks, meaning the world size is equal to 2. This design is intended to support dynamic scaling, which means that adding or removing P/D instances does not require a full system restart. As long as the address of the counterpart is known, point-to-point KVCache transmission can be performed, without being restricted by rank or world size.
 
-## NCCL Group Topology
+### NCCL Group Topology
 
 Currently, only symmetric TP (Tensor Parallelism) methods are supported for KVCache transmission. Asymmetric TP and PP (Pipeline Parallelism) methods will be supported in the future. Figure 2 illustrates the 1P2D setup, where each instance has a TP (Tensor Parallelism) degree of 2. There are a total of 7 NCCL groups: three vLLM instances each have one NCCL group with TP=2. Additionally, the 0th GPU card of the P instance establishes an NCCL group with the 0th GPU card of each D instance. Similarly, the 1st GPU card of the P instance establishes an NCCL group with the 1st GPU card of each D instance.
 
@@ -51,7 +53,7 @@ Currently, only symmetric TP (Tensor Parallelism) methods are supported for KVCa
 
 Each NCCL group occupies a certain amount of GPU memory buffer for communication, the size of which is primarily influenced by the `NCCL_MAX_NCHANNELS` environment variable. When `NCCL_MAX_NCHANNELS=16`, an NCCL group typically occupies 100MB, while when `NCCL_MAX_NCHANNELS=8`, it usually takes up 52MB. For large-scale xPyD configurations—such as DeepSeek's 96P144D—this implementation is currently not feasible. Moving forward, we are considering using RDMA for point-to-point communication and are also keeping an eye on UCCL.
 
-## GPU Memory Buffer and Tensor Memory Pool
+### GPU Memory Buffer and Tensor Memory Pool
 
 The trade-off in the size of the memory buffer is as follows: For P instances, the memory buffer is not required in PUT and PUT_ASYNC modes, but it is necessary in GET mode. For D instances, a memory buffer is needed in all three modes. The memory buffer for D instances should not be too large. Similarly, for P instances in GET mode, the memory buffer should also not be too large. The memory buffer of D instances is used to temporarily store KVCache sent by P instances. If it is too large, it will reduce the KVCache space available for normal inference by D instances, thereby decreasing the inference batch size and ultimately leading to a reduction in output throughput. The size of the memory buffer is configured by the parameter `kv_buffer_size`, measured in bytes, and is typically set to 5%～10% of the memory size.
 
@@ -59,15 +61,15 @@ If the `--max-num-seqs` parameter for P instances is set to a large value, due t
 
 To address the above issues, I have designed and developed a local Tensor memory pool for storing KVCache, inspired by the buddy system used in Linux memory modules. Since the memory is sufficiently large, typically in the TB range on servers, there is no need to consider prefix caching or using block-based designs to reuse memory, thereby saving space. When the memory buffer is insufficient, KVCache can be directly stored in the Tensor memory pool, and D instances can subsequently retrieve KVCache from it. The read and write speed is that of PCIe, with PCIe 4.0 having a speed of approximately 21 GB/s, which is usually faster than the Prefill speed. Otherwise, solutions like Mooncake and lmcache would not be necessary. The Tensor memory pool acts as a flood diversion area, typically unused except during sudden traffic surges. In the worst-case scenario, my solution performs no worse than the normal situation with a Cache store.
 
-# Install vLLM
+## Install vLLM
 
 ```shell
 pip install "vllm>=0.9.2"
 ```
 
-# Run xPyD
+## Run xPyD
 
-## Instructions
+### Instructions
 - The following examples are run on an A800 (80GB) device, using the Meta-Llama-3.1-8B-Instruct model.
 - Pay attention to the setting of the `kv_buffer_size` (in bytes). The empirical value is 10% of the GPU memory size. This is related to the kvcache size. If it is too small, the GPU memory buffer for temporarily storing the received kvcache will overflow, causing the kvcache to be stored in the tensor memory pool, which increases latency. If it is too large, the kvcache available for inference will be reduced, leading to a smaller batch size and decreased throughput.
 - For Prefill instances, when using non-GET mode, the `kv_buffer_size` can be set to 1, as Prefill currently does not need to receive kvcache. However, when using GET mode, a larger `kv_buffer_size` is required because it needs to store the kvcache sent to the D instance.
@@ -79,16 +81,16 @@ pip install "vllm>=0.9.2"
 - Supports multiple nodes; you just need to modify the `proxy_ip` and `proxy_port` in `--kv-transfer-config`.
 - In the following examples, it is assumed that **the proxy's IP is 10.0.1.1**.
 
-## Run 1P3D
+### Run 1P3D
 
-### Proxy (e.g. 10.0.1.1)
+#### Proxy (e.g. 10.0.1.1)
 
 ```shell
 cd {your vllm directory}/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/
 python3 disagg_proxy_p2p_nccl_xpyd.py &
 ```
 
-### Prefill1 (e.g. 10.0.1.2 or 10.0.1.1)
+#### Prefill1 (e.g. 10.0.1.2 or 10.0.1.1)
 
 ??? console "Command"
 
@@ -110,7 +112,7 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_buffer_size":"1e1","kv_port":"21001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20001"}}' > /var/vllm.log 2>&1 &
     ```
 
-### Decode1 (e.g. 10.0.1.3 or 10.0.1.1)
+#### Decode1 (e.g. 10.0.1.3 or 10.0.1.1)
 
 ??? console "Command"
 
@@ -132,7 +134,7 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_buffer_size":"8e9","kv_port":"22001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20002"}}' > /var/vllm.log 2>&1 &
     ```
 
-### Decode2 (e.g. 10.0.1.4 or 10.0.1.1)
+#### Decode2 (e.g. 10.0.1.4 or 10.0.1.1)
 
 ??? console "Command"
 
@@ -154,7 +156,7 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_buffer_size":"8e9","kv_port":"23001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20003"}}' > /var/vllm.log 2>&1 &
     ```
 
-### Decode3 (e.g. 10.0.1.5 or 10.0.1.1)
+#### Decode3 (e.g. 10.0.1.5 or 10.0.1.1)
 
 ??? console "Command"
 
@@ -176,16 +178,16 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_buffer_size":"8e9","kv_port":"24001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20004"}}' > /var/vllm.log 2>&1 &
     ```
 
-## Run 3P1D
+### Run 3P1D
 
-### Proxy (e.g. 10.0.1.1)
+#### Proxy (e.g. 10.0.1.1)
 
 ```shell
 cd {your vllm directory}/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/
 python3 disagg_proxy_p2p_nccl_xpyd.py &
 ```
 
-### Prefill1 (e.g. 10.0.1.2 or 10.0.1.1)
+#### Prefill1 (e.g. 10.0.1.2 or 10.0.1.1)
 
 ??? console "Command"
 
@@ -207,7 +209,7 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_buffer_size":"1e1","kv_port":"21001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20001"}}' > /var/vllm.log 2>&1 &
     ```
 
-### Prefill2 (e.g. 10.0.1.3 or 10.0.1.1)
+#### Prefill2 (e.g. 10.0.1.3 or 10.0.1.1)
 
 ??? console "Command"
 
@@ -229,7 +231,7 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_buffer_size":"1e1","kv_port":"22001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20002"}}' > /var/vllm.log 2>&1 &
     ```
 
-### Prefill3 (e.g. 10.0.1.4 or 10.0.1.1)
+#### Prefill3 (e.g. 10.0.1.4 or 10.0.1.1)
 
 ??? console "Command"
 
@@ -251,7 +253,7 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_buffer_size":"1e1","kv_port":"23001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20003"}}' > /var/vllm.log 2>&1 &
     ```
 
-### Decode1 (e.g. 10.0.1.5 or 10.0.1.1)
+#### Decode1 (e.g. 10.0.1.5 or 10.0.1.1)
 
 ??? console "Command"
 
@@ -273,7 +275,7 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_buffer_size":"8e9","kv_port":"24001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20004"}}' > /var/vllm.log 2>&1 &
     ```
 
-# Single request
+## Single request
 
 ```shell
 curl -X POST -s http://10.0.1.1:10001/v1/completions \
@@ -286,7 +288,7 @@ curl -X POST -s http://10.0.1.1:10001/v1/completions \
 }'
 ```
 
-# Benchmark
+## Benchmark
 
 ??? console "Command"
 
@@ -310,14 +312,14 @@ curl -X POST -s http://10.0.1.1:10001/v1/completions \
         --num-prompts 1000
     ```
 
-# Shut down
+## Shut down
 
 ```shell
 pgrep python | xargs kill -9 && pkill -f python
 ```
 
-# Test data
+## Test data
 
-## **Scenario**: 1K input & 200 output tokens, E2E P99 latency ~2s
+### **Scenario**: 1K input & 200 output tokens, E2E P99 latency ~2s
 
 ![testdata](https://github.com/user-attachments/assets/cef0953b-4567-4bf9-b940-405b92a28eb1)
diff --git a/docs/design/kernel/paged_attention.md b/docs/design/paged_attention.md
similarity index 99%
rename from docs/design/kernel/paged_attention.md
rename to docs/design/paged_attention.md
index 94bfa97ee2217..ef525e8c60412 100644
--- a/docs/design/kernel/paged_attention.md
+++ b/docs/design/paged_attention.md
@@ -1,5 +1,9 @@
 # vLLM Paged Attention
 
+!!! warning
+    This document is being kept in the vLLM documentation for historical purposes.
+    It no longer describes the code used in vLLM today.
+
 Currently, vLLM utilizes its own implementation of a multi-head query
 attention kernel (`csrc/attention/attention_kernels.cu`).
 This kernel is designed to be compatible with
diff --git a/docs/design/v1/prefix_caching.md b/docs/design/prefix_caching.md
similarity index 100%
rename from docs/design/v1/prefix_caching.md
rename to docs/design/prefix_caching.md
diff --git a/docs/design/v1/torch_compile.md b/docs/design/torch_compile.md
similarity index 100%
rename from docs/design/v1/torch_compile.md
rename to docs/design/torch_compile.md

From 759b87ef3e29da09f36b37046e8ff51196c09679 Mon Sep 17 00:00:00 2001
From: Brittany <24945384+bvrockwell@users.noreply.github.com>
Date: Tue, 29 Jul 2025 07:23:19 -0700
Subject: [PATCH 017/224] [TPU] Add an optimization doc on TPU (#21155)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 docs/assets/design/v1/tpu/most_model_len.png | Bin 0 -> 12126 bytes
 docs/configuration/tpu.md                    | 104 +++++++++++++++++++
 2 files changed, 104 insertions(+)
 create mode 100644 docs/assets/design/v1/tpu/most_model_len.png
 create mode 100644 docs/configuration/tpu.md

diff --git a/docs/assets/design/v1/tpu/most_model_len.png b/docs/assets/design/v1/tpu/most_model_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..344a81ed90801ee1a2ff1343f3609c8318c96f75
GIT binary patch
literal 12126
zcmds73p~_m_aD~emZDhORwAoha+}F5>o&x=MZ>sX8fKVb#*8uUAw{}ucnc{ZMGRq+
zyO3@w6^WF4wGtUp?za4&-<a98?_NId|MTD7fBUM&{Lb?{=l4A4d(L^z`F=WTyT?pK
z=xZSu3?^cZHo?GPJWTNYwZJOyNySDufL}a87&8>C<k6-P80^b1#%?DDCCrCJCcxy7
z#>?O25Nbqf5JL`WB8NcWXfzc(3Fk}4QG!$g2n-+s<2YYJ0B3?VHG)JY<Kz&g8Y*gF
zNZuOfO$uO8gNSlS6nHidU=YaQHxPqQ8$0mf1pcY3d8%u8YAJxPMl>3k;7IVYBmwQF
z8b~b_q$Uu{TcYi)t>h5K;5(V*PXNEn2;TlwXo)eAPNe{esX9_yMGg7`gmyR|9G$d6
z1+>!;0zHUC4Ol)50-6kTLjytHI5J^btiCMqqSEmM`mzY<L&za_%c&WGsnB1fF~<tv
zApi&7&9;s$FoZH^ISIck3sVmCCt7RzQT^>{c7awnFE1a;ziZ>_q=cE#aWtYe70<R2
zK8&*=4FpFiK73iCy%V{-NnbkmJ)9Ykz>XBbJqml{2sPhe5}pvm(YCyjL8X!zB-$^9
z-qe5qf;R_Bjz%1vP7VF#G#@INb2`o(8aVhbv_k^BUzrq=I+E}V;0us5X=`w{2g%U{
zk}r|_W_2~r2nENT%n<|;ad>Lz@_M#tB`V8y<XV$Xr2>5`KwCM?okR&UCXgYA<l2$*
z9^g0s`?Wt=gZ@)I{9n{L`XYB*SO@J^Q?|!AP_Wt<3elSDULiPgFvsM}cm^@TxeiRH
z1_$5?5JJFFQK3W<gJ4I)c|+4ffqwxRkwGDYCw1@@1UuQ7N~SJ*I#N>&r=zX~$*BR1
z6>=Z;50D2j=v05giV=i~mL`A;if$;7Xt<vb&#?yxItGF3%h0Sp2IXgH%m^T?8T4?V
zma`n8&M_z_5Y)9f&!IoN71yhXD?AF)$<9<9NA~?mGbH^_zy@<TzVgSrhtlT1xzums
z2;K|5`me%KO-&oeM&(y{si|pbu}6LnVUXPMB;deYj030vz;#euu?dL|Qp9o+CE%co
z;r|7B1UE&jKz=6|=Fb~?PVWAzp#O)3FoFx(N+3Dm`g!BH^RErze;STE|EZ~1<FmyP
zzarNLQG=mc2t}tdaGdn3^S52Q+!*Cty9n+byCP<}Q&z?-*ClYD&9$rj*<!uCvA?%`
zLi|Sgr1_7?y*j_bk%9~Jq(GgMCz%kiJi4ML1_dZosjMh`ab%J&)ChWmQU~huP*9ml
z0zHcnXMjS&<DruJH!J90*4Tdsc1RHI?2?gN`XYawZ+{Cr?zGP*2Dvr-9~D5L&!a&N
zi+3=c9FC&ny!`>$@Y_(V!1QNS*sTqjL_>4h95T*}K(?g@kvOT7zT9p81UhKUe=+_O
z_*^$36MUcz{H~L%NXyE9TVCGDH6_O<l>gJb%uUvxkyHG-A))}<>AyOpe=2=B@FBUe
zy5d^>hlKQh(9`^rL;7!vXas^&jdCk39nSL#m*58P%1VnH_<v<UBLVkn`a7%|d+Znd
z<w}}_J<4TFmj^ftAg+0t51JcM$cMqcN-;MvvJdlM^$C9eaC)hEa(46C<l<YcY08+V
zg7IT5YLpANJ@DCIo;`YD%>0`=v@^n@Qq1M(j>lG~GKQ#1c>F!h=cwHA7?12T-T~)Y
z^k&bMF4oL`+lagqoH;SPIIpk2Wq9c0{Qil0L38kPCl<ylAbsFK0<U0PO2S3dX;DKM
z58pYM%xx3si|9Lt1Ih10h;P1vW&!D@KS3&;$*1C8i+8^2W1U|V<jjXS43kM!zjM^X
z2Nl~}6Vwwr5apYy<hW;Y{A&$AJf)|y;!&n<{@mFYHxqgiH|vsT#-5+CJ?eTaln15Q
z?+csF4&Y^qFf}RNPN!AeF=N9S)CN*d6}2kK;FEUWc)YNzXbMb5B4r0_)z)`io-xjS
zwZX;ibs?JGcUHHgDxDVDq}}H8=p1Pvda+o8{H!fXKXPVl=828LnZngrrP;pF?D~b#
zqL8JJvpwEPdOn4=3sWQB32Qc&r76314>lbt4t#Y-Va8czZ=t<{YYjbbvo`Je!t`6;
z#TnLB(vwThlP_;mo?dni8O{vtzISSSv2SZuW!R)wOSVDdG^?kpHiR|W4YpJ?J=)ol
zIZzwyzi}6%fYM!l4~y9nJ(qK0v+mPRKYp<BA84o-`AY4&GQ2SRUvV9`k4t7YAA}3<
zp!Q!ImkKUUcAXvXqdZ7gA8)Nq@Q?SD{sMhbz(n2<y<6Z;%M-^zVAzlr<tnEd`jFfA
zR$1RZcPRB>9UOd-u%+_#!;Bs<T#PMpaHu3Z7kM@|Eb2y$7nL^9jdG5?8p+QNvBV!*
zB~o^Xe{J_!4gVdMX*(49-B{ffl#dJ3Dk<VSPHnR;+Z8<OG|^vAdG<Zdp~4TFfgl_c
zZWY<6`K(~?#j&yHR|A*<&&vo;bIsRFdJ*oY1X_l^YEBAeF_qy%#%uNP>$h2bA30vr
zoE0_W+~pf@ohy@PYj1dBlTJInGsl?UwZbn|*te!HxXWEL@TEzHK}>^nwtnBEYy&FL
zYB^vuG%Da`d?48?0xutHi!OG$a-%-hf6MDLzFAkE<lEF076Nx07paL^`lOOR^*TE_
zc^^1MU{kKv5OA00Qv2T-^;A+aiu^*xdq;R#E(s`$)U<<>z+Hl4FBV#)_EZPHa$8(T
z-ENcBl9M|l?xGX)nt{7?Ks#hCy)Sx^RT=oov<Q=TOjrSH4Hpup@e~-Ro=3eN&W=fx
z&&B7Q@0=*S!?<fI(RoBn)(y?<CbylB&D06HQ{G!}zF~2^4%xBk{MAW0L-g&gFyFa4
zk@$`h_qxlBk@gZlTbewLK^!UBKmWc2g-O-G-xbx3m@05Bw5yI&<j1NDNntf|Xw@EQ
z66vycb7IMR1x)Ud%{ug3@y8YAvA%Zsdn$w^6rWALYF74a*k7VEZ5*2`cc*F~x(A+=
z81reqS$S9?w`15ddVa{@wBw*+m@G5bzMkpnDCOtbk|V@S&hWVSZjG{*&)m@_5xffs
zx+iNWudiLPMJv!O(srEjLNia=9wC*=i}Q9b3dX0T!Lita3(x98OH<|dzN}5PPm<De
zi(e<C5T8AhiIS<<KTntB%g~}$TBItvud!B!``K8<ewZDIsiU<=r<E|1ZR<cN1hqu&
z+H+1Dm#}tAh~*LB@QV|T6NQy|mZxh!FvbQN2e6Lu!jd6T!Zl);Gam-be6A^>6p@L3
z+Zysa4yUAjD0D0xpUJTAyf*jp_)HW3mg=Yrt@?`(^sLSk^jRN&xcjf0EJtP;dc?Lf
zxIWpXDj>6G{S$iso5vU3VSW=<kthiz2S#?+p-q{)g6V6miRLyk6|@(qDzmAf)1I|*
zEdhPCHT(wp^FN$Ag0kpzdw5m@xSUr>p<O!2LmkT772O1xRzI$lT|e-(OzKg7Eo8ro
zAIicqAi_I(>aiK?xM~A!wQBv;$%ki<&3YRO_%}>-05j`tfSbRMI!6n5fl@4Q%(bwH
zEem0}%&NulVtDe5TaF5?#v&v38QFXqDZmz`QJ$CG7AFg(ZqEw4feqPZAFNP4HIHuG
zV?i0^S;wjvAwIqk|1d+V6$>`57~Xk$!?5sd$|PKX`JKVx-X;yc^4{PcN>z;(Q7@#}
zGa`BLzHw5bf8(~^z2(HcO7;b|g_*%8^de`RnhwHMC#>W5JdN&T+U563IlsTJte|jD
zna87=)}*&DkbuoEJ%o`68_MYdDTHN^{WP@fS$$-c>w?{Yj_BRHym9uXQhEzgNS=D8
zs!O>%IkK?9ucJWhamAF=HK~}S&YaSV?NUzF$r$Y2SVJEP>0HFyyL8VH#q&aDonrjF
z0w&u*8XInYE)WOC(!Ub8WqKg_*a_KU>33UrP{#yg4as~0%xBu|LcDR}L9kdDc!JrI
z5DoAtca<r3wP&Y}O@v?XYv!(^IfFcbkQUVit{jq9E<Vf~G3LgTXVz+*bpCDRY@qZ3
z!){=u@}5ASO~UeVsDy*&3Htd2efTN;r%6j~+0&K@4F=hXPSGFjH?8HVlG@JOCBkf5
zT5~G$n<f6Qx9{oQ#=9g|jUb9!y|}Sm+#yHQ@{?|^z^yvN_c6!?r=@?@y&u?nOh&@R
zacySu{^tjm1n(?%y!<3)2#b9KKPmaGOL$xMsZ#VCA)K)VPoCtZsm7ac>P&^9`<GWB
zPO@$7YkT$F)wPPbsJCy)bBHMRjoof(XtW!hcUr$|H*{@RUow0XexyiIp&?Tk$qxjE
z*ZFyd#AJqUqTBev6D+ofPe5TCoPGCK@y4Z885=jV6$vQ3UaknLc)|OV&VQJzsq!|3
z#$v0*Y^B_1d$!&_u}bS0G-)ssCNtER+$O`;m<-ATw?CmgSS~vT3zG|{{0>%So+$Tr
zHQly5La5Y?J%W}dqp`&c21&K)l_lS7lTt@1QbNwfgZ<2HHAK%N)=hl@op(?XxQ02q
zx*046+hn!1sYBZYQXR;U(x0>RI#wpNZC6vzqjvV5W9QZhY{6h(3CGs4(S2tt{rJ1&
z_cXUR>(~IE5yM!0!9E-nE}%e7IGiX09Z?kX0I~#(=W~|ch_}ykTz`#4E%08dYF$)m
zH-fet`;~y(^s84A7;Moe)m7H~PfWaCiuE863h8%9640TpEAgQ27ri)-WQWB)aO~G#
z{Gl`dwu^rbQk?Pqo_^D3>d_DO$L+he37qbomGn(4hB<9%^{rQ&y<bJxX2@Ujffo-~
zw<)n9Mat%;uVSCQUq}?wzeZ%UF%%sjJ$~`oOXEZosF53S&2y8JVh1Pb5Sqx?)sE(J
zAee77Uv)MKg*@N@$l?d~k-mh`mW}|aI>Ok6om~Xb2a<&Z(tJR|<l(y@DV=*kD;EwK
z<sZ2OWHUHJnVK=w{5Vo~Cu21a-%B?L{VP(Xw{FRrqHnvT&Om2-2EsYcp6s)(7`g{|
z*iW!0$F@P&#()1<oCx#T&dflU7tPJS=dN7H7myb9<aLa<-`}SndYqljuY&0K@TN@*
z@|H*7_SVjZ_3A@m^Jfl#ybP7!8+Q_uTch@12M{e$xuZVeV&FZ==I|0BW?USfrG>@)
zcU2kA<FYr`@@YYN*hB`DFR{%`c3?JvaBW&>-vuQrLvc_ViHDtIU#6m$L6dq}wODp}
z2@Q+ZAhuoC>vL?3{wROedcbZPpPl&NU5eL&1w9x#Wfc6z;S^p)*~X^zm=RrOHKbBh
zDfWBM2St90va32rJBzG&QpFafig@xiLkp0CAoA*;D-|$SOMUwc3=iwikOXa`R&UO^
zZ=P)zgQQEOpV=lkdl1^Q673=Nc*>6tJtZxqRx@j8@*Su~rLmILwYJmi{ZuQus>?FJ
zYe-7_+Q#D$n0_n6@cclXs<^iQ)5|q(sdA+cZm>4b+ZW&N!4F15fjYp)E5f8{73~v>
zmy9%Pu;yQYmd(l0ZW>!ul};2j_y5ateQZDE+xkMeuIOTyiu*<U>iaisT8M-6*BREv
z+xyJzc{`~eUL6CSh6qvu5!cNw3eW)LRG(dYOrRVn-~#f4OZp5uKeX>iR*}gwYnzWK
z*qO9p`&OVoP!@<Jv}$|h*%;UZ4D9!x`wqNCS~M46kgGyj#sG9<yKgt&W5Z?yU^5zR
z*Zgyq7+w-##w>m8no<JxGFo`Nto8tJY(htVcC=rrBDPF-;u`XLPj&6cY=2~HnyP2O
z@h_DvCTju1QI-)<dk?g!BtlD?^<?mguQkdlDLpR3k20xm^k`7QAl(n=UCX?ms^q%#
z=_8AopH-Xl6m(F4I~g0Vn`rPsSCF3Esb!4NjVEG{L#4u`${@;u+K^|FS_X)h?$|H*
zFMBpdS5gP`Gc+k@B~(3neIA}2Qp`EE-NvOgOOFC5jWmOp=(jyprNON3&JBfLXVq{A
z<+g2e`SAK7r9IbN)2~B*qb50BLfNGQ;#N*<)LQfQ<L{6!nIBkH11>iYi}Vd&^=%Ch
zN*T_IeB^G{ga{dPDgu3uYj@<U6I$d)=X6S1cOMhK0y^3ni_w?2kC)WLg}-#_zZPC>
zldb<0Fek!T3^*s?iHaRdJuXfBPy&<Eq38BRPSk4#kJ{^oy*tn9tLwtjK${XAK!3@;
zPS6<#gqhtH{yl)RahHd97q}H@`Ywk~^sC^f4(k%`i%V_=XQ90ani+Gh;;SS-M?jr^
zBr*$-5PcDEi)Y@qsJa3|Xbd7#5~L#TMqS=l^Kqu<=FIyQtPA<wV3TI}d%PsCkYPnm
zTZV3E$znZd@Exvr?T%zsboEv=CvBnDg-(Rvo))`mZA9RY2+P5V0CqR@h&McicXt;i
z#-Z4SP)k*w&C|>+?{%z?2$p1&UVDE`cv?)>^x4t%+ufYjY~I!3?m0X8<A>HdPa(54
zz#U+*<$`W&6tFr%M?7Obc4Dm~zHope1A4Vx%kW1R0i_`&x!U^d&Ok>y;?RRLPds8i
z20PYUozs52D`boq^W$Vfra??U>qD+jjjA|C$>FjZYw1CjUQbr|>kYb53*|+HfD`fo
z{d*%Y>qEe#G__<qZlMo9Vil&BHZ0tS4?$h^fK<$nfwPM@;)Rs~@lx_|hSgKo%A~*_
zrmK5z6;i-jjUFn@TPJ7Xgu$GY*x}R`KMLqIw|9W3d+Kg1Q|LGKD#>6^&t2M=Py`-u
zU!$Nrudojs_J_3liJNr1AkIZ1rJpFlC!uKnJz$nx8}`p{>N{_Ai2Ul#iAu%@MsiKz
zHAcb0Uc%l>R%ec&{JKiqOE8$ui`RPr@8e$zg2|y_f4HQ83ubkcM@+uon|s@{84}0S
z_@=KUhW3XyH!OXMbd(s4F8Q|O(zwz|3k#Xz4<+B?rvNX}Lv8%{I!!*fz;6+_K*{{z
zst;?SvSqNF_q1`LS&~#qyFh%$<BMkQ1VDqZt`f}yK%9(|>TSb_3bs7?a;3WC)i>dX
zZdh(n;@b+?m)og3oDsK^{Qya%_x`TzxJzN8{XKc@hEEIrRX<)zK$d&v8e7H)*<)=B
zXT)<0+u4LjW~rb2$z(-`%jp_`f+$zb6S8RrR(6+Qs&!<gq*)0ne0mNA`xP;CFuK^a
z>Jdwj<Y|t5at7(!+kZT*dfmEpy-A0<qSK_V+oDIB4!0UC^|E{w)M`T~vb>UvTcWyz
z6_j(B$DU-R&m=U=zuxHIS(xhT(Sdnowj~M=4s=ymQl&c2LeDlwj|p7jEU#(jd6bh8
za^H2>+YY9kiA>(2Bkk=%!~v5oM<O6lI$6zPm@8*e%ApG<jt{e%3FxaDLyX)sFnx9X
zmwIkFdNS~oHSqZUIczF%gH;Q|Wot)tQd+%K*dR}p)`zFA)EceH6e(jHYcz?u*D_+F
zVX>tC-3{TAlY0#=*<_F3Fm0i9JK7O08e1j;nlLC5)2-T`J^jOlHDHr!cGac}!I)h(
zHmeRNf}By8FC+25@o)#Ajo*~CvOC9Q&DbM=Pf0pH@%(BtYi<0@CDiGshj=5(_C)Tz
zw-&oALIBkyIc~fAQr@i&sR(foKou&RC2l%x^F(%N)b+)UcvaQr_)nOa{Crr&RKD5E
zQ(OFO7I81u)K}W(pqA#9^qv<aY%lN1%io{_!jy;apx6=Z-1cq}W#Pgyfl#8VRntk2
zgd0kE6IMB;1BH7pnnnblc+0-Slxa6niY2Kv;q3U{DJY5|dA<6E{jd>~8+`(VlI0U@
z1J|TA`5bhKO4DRPO}#Pafd>0!?cW%p^NyZ4#eN8ggUJNTrqrxrUvOu^yq#A9+1=&!
e6QXl5M|<@J;ajtk!GD;CneX0XQi9rd@P7b99exG?

literal 0
HcmV?d00001

diff --git a/docs/configuration/tpu.md b/docs/configuration/tpu.md
new file mode 100644
index 0000000000000..005b7f78f4407
--- /dev/null
+++ b/docs/configuration/tpu.md
@@ -0,0 +1,104 @@
+# TPU Optimization Tips
+
+This doc serves as a collection of handy tips for optimizing your vLLM on TPU workload.
+
+## Get started
+
+Looking for setup and installation instructions? Find them [here](../getting_started/installation/google_tpu.md).
+
+### TPU workload sizing
+
+When selecting the ideal number of chips for a single serving instance, it's important to account for both the model size and the average request context length. Adequate HBM for the KV cache is essential to ensure a sufficient number of concurrent requests can be processed.
+
+The following colab [calculator](https://colab.research.google.com/github/ericehanley/rightsize-vllm/blob/main/HBM_Calculator.ipynb) will tell you:
+
+- KV cache size requirement per token and per request
+- TPU/GPU memory consumed by the model weights
+- TPU/GPU memory allocated for the KV cache
+- Maximum \# of requests you can approximately set (--max-num-seqs)
+
+This approach serves as a general rule of thumb.
+
+#### Latency-throughput tradeoff
+
+As with rightsizing the number of chips for your workload, consider adjusting `--max-num-seqs` to fine-tune the latency-throughput balance. Decreasing `--max-num-seqs` and/or increasing the number of chips can help reduce latency.
+
+`--max-num-seqs` defines the number of concurrent decode slots, effectively limiting the number of requests the server can process tokens for simultaneously. Increasing this value allows the server to pre-allocate more HBM to handle a higher number of concurrent requests, which can maximize overall throughput. However, this often increases the end-to-end (e2e) latency per request.
+
+Therefore, carefully tuning `--max-num-seqs` is crucial to achieving the desired balance between latency and throughput for your specific workload.
+
+In a similar way, `--max-num-batch-tokens` can be adjusted down to improve latency, or adjusted up to improve throughput.
+
+#### Compilation and Caching
+
+Coming from a GPU background, one of the key differences you'll notice with TPUs is an initial compilation step. TPUs are specialized accelerators (ASICs) that achieve maximum performance by executing pre-compiled, static computation graphs via the XLA compiler. Unlike GPUs, which can handle dynamic input shapes more flexibly, TPUs require a specific compiled graph for each tensor shape (e.g., batch size and sequence length) they process.
+
+To manage this, vLLM performs a one-time "warmup" process when you first launch the server. During this phase, it pre-compiles the model for various common input shapes and saves these compiled graphs to a cache on disk or remote storage (located at `~/.cache/vllm/xla_cache` by default). This process can range significantly, anywhere from a few minutes to an hour depending on the size of the model and context length used.
+
+Although the first compilation can take some time, for all subsequent server launches, vLLM can load these graphs directly from the cache, eliminating the compilation time for future runs.
+
+Use `VLLM_XLA_CACHE_PATH` environment variable to write to shareable storage for future deployed nodes (like when using autoscaling).
+
+#### Reducing compilation time
+This initial compilation time ranges significantly and is impacted by many of the arguments discussed in this optimization doc. Factors that influence the length of time to compile are things like model size and `--max-num-batch-tokens`. Other arguments you can tune are things like `VLLM_TPU_MOST_MODEL_LEN`.
+
+### Optimize based on your data
+
+#### max model len vs. most model len
+
+![most_model_len](../assets/design/v1/tpu/most_model_len.png)
+
+If most of your requests are shorter than the maximum model length but you still need to accommodate occasional longer requests, setting a high maximum model length can negatively impact performance. In these cases, you can try introducing most model len by specifying the `VLLM_TPU_MOST_MODEL_LEN` environment variable.
+
+For example, 1% requests are 32k length and 99% requests are 2k length. You can pass 32k into `--max-model-len 32768` and use `VLLM_TPU_MOST_MODEL_LEN=2048`.
+
+The requests get subdivided into max-model-len and most-model-len categories, for the latter category, we can gain better performance since the server can process more requests at a time.
+
+#### Padding
+
+For online serving with latency requirements, consider switching to bucket padding by setting the `VLLM_TPU_BUCKET_PADDING_GAP` environment variable. Because of the layout of the TPU, try using increments of 128: 128, 256, etc.
+
+The server pads the requests into fixed lengths before sending them to the model to avoid recompilation. To read more about tpu padding, see [here](https://cloud.google.com/tpu/docs/performance-guide#xla-efficiencies). Currently, there are 2 ways to pad the requests:
+
+1) the default exponential padding (pad to the nearest power of 2)
+2) bucket padding (pad to the nearest linearly increasing bucket).
+
+When using bucket padding, the buckets start from 16, end at max_model_len, and increment by `VLLM_TPU_BUCKET_PADDING_GAP`.
+
+For example, max_model_len=512, padding_gap=64, the buckets will be [16, 32, 64, 128, 192, 256, 320, 384, 448, 512].
+
+The fewer tokens we pad, the less unnecessary computation TPU does, the better performance we can get. For example, if num_tokens=300, with exponential padding, we pad to 512, with the bucket_padding above, we pad to 320.
+
+However, you need to be careful to choose the padding gap. If the gap is too small, it means the number of buckets is large, leading to increased warmup (precompile) time and higher memory to store the compiled graph. Too many compilaed graphs may lead to HBM OOM. Conversely, an overly large gap yields no performance improvement compared to the default exponential padding.
+
+**If possible, use the precision that matches the chip’s hardware acceleration**
+
+- v5e has int4/int8 hardware acceleration in the MXU
+- v6e has int4/int8 hardware acceleration in the MXU
+
+Supported quantized formats and features in vLLM on TPU [Jul '25]
+- INT8 W8A8
+- INT8 W8A16
+- FP8 KV cache
+- [WIP] FP8 W8A8
+- [WIP] AWQ
+- [WIP] FP4 W4A8
+
+**Don't set TP to be less than the number of chips on a single-host deployment**
+
+Although it’s common to do this with GPUs, don't try to fragment 2 or 8 different workloads across 8 chips on a single host. If you need 1 or 4 chips, just create an instance with 1 or 4 chips (these are partial-host machine types).
+
+### Tune your workloads!
+
+Although we try to have great default configs, we strongly recommend you check out the [vLLM auto-tuner](../../benchmarks/auto_tune/README.md) to optimize your workloads for your use case.
+
+### Future Topics We'll Cover
+
+#### Profiling
+
+The auto-tuner provides a profile of optimized configurations as its final step. However, interpreting this profile can be challenging for new users. We plan to expand this section in the future with more detailed guidance. In the meantime, you can learn how to collect a TPU profile using vLLM's native profiling tools [here](../examples/offline_inference/profiling_tpu.md). This profile can provide valuable insights into your workload's performance.
+
+#### SPMD
+More details to come.
+
+**Want us to cover something that isn't listed here? Open up an issue please and cite this doc. We'd love to hear your questions or tips.**

From ad341c519457fa706c549c9b7edc8438c35fd8d1 Mon Sep 17 00:00:00 2001
From: Wenhua Cheng <wenhua.cheng@intel.com>
Date: Tue, 29 Jul 2025 22:26:31 +0800
Subject: [PATCH 018/224] [Bugfix]fix mixed bits and visual language model
 quantization in AutoRound (#21802)

Signed-off-by: Wenhua Cheng <wenhua.cheng@intel.com>
---
 .../layers/quantization/auto_round.py         | 155 +++++++++++++-----
 1 file changed, 116 insertions(+), 39 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py
index ea17cd56c9855..a9e967e608e96 100644
--- a/vllm/model_executor/layers/quantization/auto_round.py
+++ b/vllm/model_executor/layers/quantization/auto_round.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from fractions import Fraction
-from typing import Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
 
 import torch
 
@@ -16,6 +16,9 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
 from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
 
+if TYPE_CHECKING:
+    from vllm.model_executor.models.utils import WeightsMapper
+
 logger = init_logger(__name__)
 
 
@@ -28,7 +31,13 @@ class AutoRoundConfig(QuantizationConfig):
     SUPPORTED_DTYPES = {"int"}
     SUPPORTED_FORMATS = {"auto_round:auto_gptq", "auto_round:auto_awq"}
     SUPPORTED_BACKENDS = {
-        "auto", "gptq", "gptq:marlin", "awq", "awq:marlin", "marlin", "ipex"
+        "auto",
+        "gptq",
+        "gptq:marlin",
+        "awq",
+        "awq:marlin",
+        "marlin",
+        "ipex",
     }
 
     def __init__(
@@ -109,26 +118,70 @@ class AutoRoundConfig(QuantizationConfig):
         )
 
     def get_layer_config(self, layer, layer_name: str):
-        # Priority: extra_config > block_name_to_quantize > type fallback
-        if self.extra_config and layer_name in self.extra_config:
-            cfg = self.extra_config[layer_name]
-            return cfg.get("bits", self.weight_bits), cfg.get(
-                "group_size", self.group_size), cfg.get("sym", self.sym)
 
-        quantized = True
+        def get_config(name: str, quantized: bool = True):
+            cfg = self.extra_config.get(name, {}) if self.extra_config else {}
+            return (
+                cfg.get("bits", self.weight_bits if quantized else 16),
+                cfg.get("group_size", self.group_size if quantized else -1),
+                cfg.get("sym", self.sym if quantized else True),
+            )
+
+        # 1. Exact match from config
+        if self.extra_config and layer_name in self.extra_config:
+            return get_config(layer_name)
+
+        # 2. Determine whether layer should be quantized
+        quantized = not isinstance(layer, ParallelLMHead)
         if self.block_name_to_quantize:
             quantized = any(
                 layer_name.startswith(name)
                 for name in self.block_name_to_quantize)
-        elif isinstance(layer, ParallelLMHead):
-            quantized = False
 
-        return (self.weight_bits, self.group_size,
-                self.sym) if quantized else (16, -1, True)
+        # 3. Handle fused MoE
+        if self.extra_config and "fusedmoe" in layer.__class__.__name__.lower(
+        ):
+            moe_configs = [
+                get_config(name, quantized) for name in self.extra_config
+                if name.startswith(layer_name)
+            ]
+            if moe_configs:
+                if len(set(moe_configs)) == 1:
+                    return moe_configs[0]
+                raise ValueError(f"Fused MoE layer '{layer_name}' requires "
+                                 f"consistent quant config for all sub-layers")
+
+        # 4. Handle fused QKV or other patterns
+        if self.extra_config:
+            for fusion_key, sub_keys in self.packed_modules_mapping.items():
+                if fusion_key in layer_name and layer_name.count(
+                        fusion_key) == 1:
+                    sub_names = [
+                        layer_name.replace(fusion_key, sub_key)
+                        for sub_key in sub_keys
+                    ]
+                    sub_configs = [
+                        get_config(name, quantized) for name in sub_names
+                    ]
+                    if len(set(sub_configs)) == 1:
+                        return sub_configs[0]
+                    raise ValueError(
+                        f"Fused module '{layer_name}' requires "
+                        f"consistent quant config for {sub_names}")
+
+        # 5. Fallback
+        return get_config(layer_name, quantized)
 
     def check_quantized(self, weight_bits: int) -> bool:
         return weight_bits < 16
 
+    def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
+        if self.block_name_to_quantize is not None:
+            self.block_name_to_quantize = hf_to_vllm_mapper.apply_list(
+                self.block_name_to_quantize)
+        if self.extra_config is not None:
+            self.extra_config = hf_to_vllm_mapper.apply_dict(self.extra_config)
+
     def apply_awq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
         from vllm.model_executor.layers.fused_moe import FusedMoE
         from vllm.model_executor.layers.quantization.utils.marlin_utils import (
@@ -141,9 +194,14 @@ class AutoRoundConfig(QuantizationConfig):
             else:
                 return None
 
-        logger.debug("[%s] Type: %s, Bits: %s, Group Size: %s, Sym: %s",
-                     prefix, layer.__class__.__name__, weight_bits, group_size,
-                     sym)
+        logger.debug(
+            "[%s] Type: %s, Bits: %s, Group Size: %s, Sym: %s",
+            prefix,
+            layer.__class__.__name__,
+            weight_bits,
+            group_size,
+            sym,
+        )
         if backend == "auto" or "marlin" in backend:
             AWQ_TYPE_MAP = {
                 4: scalar_types.uint4,
@@ -162,15 +220,19 @@ class AutoRoundConfig(QuantizationConfig):
         if use_marlin:
             from vllm.model_executor.layers.quantization.awq_marlin import (
                 AWQMarlinConfig, AWQMarlinLinearMethod, AWQMoEMethod)
-            quant_args_marlin = AWQMarlinConfig(weight_bits=weight_bits,
-                                                group_size=group_size,
-                                                zero_point=not sym,
-                                                lm_head_quantized=False,
-                                                full_config={},
-                                                modules_to_not_convert=[])
+
+            quant_args_marlin = AWQMarlinConfig(
+                weight_bits=weight_bits,
+                group_size=group_size,
+                zero_point=not sym,
+                lm_head_quantized=False,
+                full_config={},
+                modules_to_not_convert=[],
+            )
         else:
             from vllm.model_executor.layers.quantization.awq import (
                 AWQConfig, AWQLinearMethod)
+
             quant_args = AWQConfig(
                 weight_bits=weight_bits,
                 group_size=group_size,
@@ -182,6 +244,7 @@ class AutoRoundConfig(QuantizationConfig):
                 return AWQMoEMethod(quant_args_marlin)
             from vllm.model_executor.layers.quantization.moe_wna16 import (
                 MoeWNA16Config)
+
             config = {
                 "quant_method": "awq",
                 "bits": weight_bits,
@@ -206,6 +269,7 @@ class AutoRoundConfig(QuantizationConfig):
         from vllm.model_executor.layers.fused_moe import FusedMoE
         from vllm.model_executor.layers.quantization.utils.marlin_utils import (
             check_marlin_supported, check_moe_marlin_supports_layer)
+
         weight_bits, group_size, sym = self.get_layer_config(layer, prefix)
         if not self.check_quantized(weight_bits):
             if isinstance(layer, (LinearBase, ParallelLMHead)):
@@ -213,19 +277,24 @@ class AutoRoundConfig(QuantizationConfig):
             else:
                 return None
 
-        logger.debug("[%s] Type: %s, Bits: %s, Group Size: %s, Sym: %s",
-                     prefix, layer.__class__.__name__, weight_bits, group_size,
-                     sym)
+        logger.debug(
+            "[%s] Type: %s, Bits: %s, Group Size: %s, Sym: %s",
+            prefix,
+            layer.__class__.__name__,
+            weight_bits,
+            group_size,
+            sym,
+        )
         if backend == "auto" or "marlin" in backend:
             GPTQ_TYPE_MAP = {
                 (4, True): scalar_types.uint4b8,
                 (8, True): scalar_types.uint8b128,
             }
-            use_marlin = ((weight_bits, sym) in GPTQ_TYPE_MAP
-                          and check_marlin_supported(
+            use_marlin = (weight_bits,
+                          sym) in GPTQ_TYPE_MAP and check_marlin_supported(
                               GPTQ_TYPE_MAP[(weight_bits, sym)],
                               group_size,
-                              has_zp=not sym))
+                              has_zp=not sym)
             if isinstance(layer, FusedMoE):
                 use_marlin = use_marlin and check_moe_marlin_supports_layer(
                     layer, group_size)
@@ -234,26 +303,33 @@ class AutoRoundConfig(QuantizationConfig):
         if use_marlin:
             from vllm.model_executor.layers.quantization.gptq_marlin import (
                 GPTQMarlinConfig, GPTQMarlinLinearMethod, GPTQMarlinMoEMethod)
-            quant_args_marlin = GPTQMarlinConfig(weight_bits=weight_bits,
-                                                 group_size=group_size,
-                                                 is_sym=sym,
-                                                 lm_head_quantized=False,
-                                                 desc_act=False,
-                                                 dynamic={},
-                                                 full_config={})
+
+            quant_args_marlin = GPTQMarlinConfig(
+                weight_bits=weight_bits,
+                group_size=group_size,
+                is_sym=sym,
+                lm_head_quantized=False,
+                desc_act=False,
+                dynamic={},
+                full_config={},
+            )
         else:
             from vllm.model_executor.layers.quantization.gptq import (
                 GPTQConfig, GPTQLinearMethod)
-            quant_args = GPTQConfig(weight_bits=weight_bits,
-                                    group_size=group_size,
-                                    lm_head_quantized=False,
-                                    desc_act=False,
-                                    dynamic={})
+
+            quant_args = GPTQConfig(
+                weight_bits=weight_bits,
+                group_size=group_size,
+                lm_head_quantized=False,
+                desc_act=False,
+                dynamic={},
+            )
 
         if isinstance(layer, FusedMoE):
             if use_marlin:
                 from vllm.model_executor.layers.quantization.moe_wna16 import (
                     MoeWNA16Config)
+
                 config = {
                     "quant_method": "gptq",
                     "bits": weight_bits,
@@ -282,6 +358,7 @@ class AutoRoundConfig(QuantizationConfig):
                 return None
         from vllm.model_executor.layers.quantization.ipex_quant import (
             IPEXAWQLinearMethod, IPEXConfig, IPEXGPTQLinearMethod)
+
         if isinstance(layer, (LinearBase, ParallelLMHead)):
             if "awq" in self.packing_format:
                 config = IPEXConfig(method="awq",

From 58b11b24a69f0d5fc48f3a6ce8291e8d92af26e2 Mon Sep 17 00:00:00 2001
From: elvischenv <219235043+elvischenv@users.noreply.github.com>
Date: Tue, 29 Jul 2025 22:34:00 +0800
Subject: [PATCH 019/224] [Bugfix] Fix workspace buffer None issue for
 Flashinfer TRTLLM Backend (#21525)

Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com>
---
 .../kernels/benchmark_trtllm_attention.py     | 42 ++++++++++++-------
 ...test_flashinfer_trtllm_decode_attention.py | 16 ++++---
 vllm/attention/backends/flashinfer.py         | 15 +++++--
 vllm/v1/attention/backends/flashinfer.py      | 28 ++++++-------
 4 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/benchmarks/kernels/benchmark_trtllm_attention.py b/benchmarks/kernels/benchmark_trtllm_attention.py
index 8c980f930366c..68c48858e61cc 100644
--- a/benchmarks/kernels/benchmark_trtllm_attention.py
+++ b/benchmarks/kernels/benchmark_trtllm_attention.py
@@ -71,22 +71,20 @@ def benchmark_decode(
     if kv_cache_dtype.startswith("fp8"):
         kv_cache, _ = to_float8(kv_cache)
 
+    output_trtllm = torch.empty(q.shape, dtype=dtype)
+
     # Benchmark TRT decode
     def trt_decode():
         return flashinfer.decode.trtllm_batch_decode_with_kv_cache(
             q,
             kv_cache,
             workspace_buffer,
-            num_qo_heads,
-            num_kv_heads,
-            sm_scale,
             block_tables,
             kv_lens_tensor,
-            page_size,
             max_kv_len,
-            kv_cache_dtype,
-            k_scale,
-            v_scale,
+            bmm1_scale=k_scale * sm_scale,
+            bmm2_scale=v_scale,
+            out=output_trtllm,
         )
 
     def time_fn(fn, warmup=10, trials=20):
@@ -125,6 +123,8 @@ def benchmark_decode(
     kv_indices = torch.tensor(kv_indices, dtype=torch.int32)
     kv_last_page_lens = torch.tensor(kv_last_page_lens, dtype=torch.int32)
 
+    output_baseline = torch.empty(q.shape, dtype=dtype)
+
     wrapper = flashinfer.BatchDecodeWithPagedKVCacheWrapper(
         workspace_buffer,
         kv_layout,
@@ -145,7 +145,7 @@ def benchmark_decode(
     )
 
     def baseline_decode():
-        return wrapper.run(q, kv_cache, sm_scale, k_scale, v_scale)
+        return wrapper.run(q, kv_cache, sm_scale, k_scale, v_scale, output_baseline)
 
     baseline_mean, baseline_std = time_fn(baseline_decode)
 
@@ -214,25 +214,39 @@ if __name__ == "__main__":
     max_seq_lens = [1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072]
     all_results = []
 
-    print("Running benchmark for kv_cache_dtype: bfloat16")
     print(
-        "\tnum_seqs\tmax_seq_len\ttrt_mean\ttrt_std\tbaseline_mean\tbaseline_std\tspeedup_percent"
+        "Running benchmark for q_dtype = bfloat16, kv_cache_dtype: bfloat16, "
+        "output_dtype: bfloat16"
+    )
+    print(
+        "\tnum_seqs\tmax_seq_len\ttrt_mean\ttrt_std\tbaseline_mean\t"
+        "baseline_std\tspeedup_percent"
     )
     for max_seq_len in max_seq_lens:
         for bs in num_seqs:
             result = benchmark_decode(
-                bs, max_seq_len, dtype=torch.bfloat16, kv_cache_dtype="auto"
+                bs,
+                max_seq_len,
+                dtype=torch.bfloat16,
+                kv_cache_dtype="auto",
             )
             all_results.append(result)
 
-    print("Running benchmark for q_dtype = bfloat16, kv_cache_dtype: fp8")
     print(
-        "\tnum_seqs\tmax_seq_len\ttrt_mean\ttrt_std\tbaseline_mean\tbaseline_std\tspeedup_percent"
+        "Running benchmark for q_dtype = bfloat16, kv_cache_dtype: fp8, "
+        "output_dtype: bfloat16"
+    )
+    print(
+        "\tnum_seqs\tmax_seq_len\ttrt_mean\ttrt_std\tbaseline_mean\t"
+        "baseline_std\tspeedup_percent"
     )
     for max_seq_len in max_seq_lens:
         for bs in num_seqs:
             result = benchmark_decode(
-                bs, max_seq_len, dtype=torch.bfloat16, kv_cache_dtype="fp8"
+                bs,
+                max_seq_len,
+                dtype=torch.bfloat16,
+                kv_cache_dtype="fp8",
             )
             all_results.append(result)
 
diff --git a/tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py b/tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py
index 96eee13695a9d..2e2130fab6a21 100644
--- a/tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py
+++ b/tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py
@@ -113,27 +113,25 @@ def test_flashinfer_trtllm_decode_with_baseline(
                  kv_data_type=dtype,
                  logits_soft_cap=soft_cap)
 
-    output = wrapper.run(query, key_value_cache, scale)
+    output = torch.empty(query.shape, dtype=dtype)
+    wrapper.run(query, key_value_cache, scale, out=output)
 
     # TRTLLM Decode
     max_kv_len = max(kv_lens)
     kv_lens_tensor = torch.tensor(kv_lens,
                                   dtype=torch.int,
                                   device=query.device)
-    output_trtllm = flashinfer.decode.trtllm_batch_decode_with_kv_cache(
+    output_trtllm = torch.empty(query.shape, dtype=dtype)
+    flashinfer.decode.trtllm_batch_decode_with_kv_cache(
         query.contiguous(),
         key_value_cache,
         workspace_buffer,
-        num_query_heads,
-        num_kv_heads,
-        scale,
         block_tables,
         kv_lens_tensor,
-        block_size,
         max_kv_len,
-        "auto",
-        k_scale,
-        v_scale,
+        bmm1_scale=k_scale * scale,
+        bmm2_scale=v_scale,
+        out=output_trtllm,
     )
 
     torch.testing.assert_close(output, output_trtllm, atol=1e-2, rtol=1e-2), \
diff --git a/vllm/attention/backends/flashinfer.py b/vllm/attention/backends/flashinfer.py
index e6e60e7562482..824ff8cca201a 100644
--- a/vllm/attention/backends/flashinfer.py
+++ b/vllm/attention/backends/flashinfer.py
@@ -1104,7 +1104,12 @@ class FlashInferImpl(AttentionImpl):
         window_left = window_size[0] if window_size is not None else -1
 
         prefill_output: Optional[torch.Tensor] = None
-        decode_output: Optional[torch.Tensor] = None
+        if num_decode_tokens > 0:
+            decode_output = torch.empty(decode_query.shape,
+                                        dtype=decode_query.dtype,
+                                        device=decode_query.device)
+        else:
+            decode_output = None
         stride_order = FlashInferBackend.get_kv_cache_stride_order()
         if prefill_meta := attn_metadata.prefill_metadata:
             # We will use flash attention for prefill
@@ -1155,17 +1160,18 @@ class FlashInferImpl(AttentionImpl):
                     num_decode_tokens, attn_metadata.max_decode_seq_len,
                     kv_cache_dtype, attn_metadata.num_qo_heads,
                     attn_metadata.num_kv_heads, attn_metadata.head_dim):
-                decode_output = decode_meta.decode_wrapper.run(
+                decode_meta.decode_wrapper.run(
                     decode_query,
                     kv_cache.permute(*stride_order),
                     k_scale=layer._k_scale_float,
                     v_scale=layer._v_scale_float,
+                    out=decode_output,
                 )
             else:
                 workspace_buffer = (
-                    decode_meta.decode_wrapper._int_workspace_buffer)
+                    decode_meta.decode_wrapper._float_workspace_buffer)
                 assert FlashInferState.get_kv_cache_layout() == "HND"
-                decode_output = trtllm_batch_decode_with_kv_cache(
+                trtllm_batch_decode_with_kv_cache(
                     query=decode_query,
                     kv_cache=kv_cache.permute(*stride_order),
                     workspace_buffer=workspace_buffer,
@@ -1174,6 +1180,7 @@ class FlashInferImpl(AttentionImpl):
                     max_seq_len=attn_metadata.max_decode_seq_len,
                     bmm1_scale=layer._k_scale_float * softmax_scale,
                     bmm2_scale=layer._v_scale_float,
+                    out=decode_output,
                 )
 
         if prefill_output is None and decode_output is not None:
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index b72745ef156eb..775780807eae2 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -194,7 +194,6 @@ class FlashInferMetadata:
     max_seq_len: int
     seq_lens: torch.Tensor
     block_table_tensor: torch.Tensor
-    workspace_buffer: torch.Tensor
 
     # For handling prefill decode split
     num_decodes: int
@@ -473,7 +472,6 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
             max_seq_len=max_seq_len,
             seq_lens=seq_lens,
             block_table_tensor=block_table_tensor,
-            workspace_buffer=self._get_workspace_buffer(),
         )
 
         self._plan(num_prefills, num_decodes, attn_metadata)
@@ -641,11 +639,11 @@ class FlashInferImpl(AttentionImpl):
         if decode_wrapper := attn_metadata.decode_wrapper:
             decode_query = query[:num_decode_tokens]
             assert decode_query.shape[0] == num_decode_tokens
+            assert decode_wrapper is not None
             if not FlashInferBackend.use_trtllm_decode_attention(
                     attn_metadata.num_decodes, attn_metadata.max_seq_len,
                     self.kv_cache_dtype, attn_metadata.num_qo_heads,
                     attn_metadata.num_kv_heads, attn_metadata.head_dim):
-                assert decode_wrapper is not None
                 assert decode_wrapper._window_left == window_left
                 assert decode_wrapper._logits_soft_cap == (self.logits_soft_cap
                                                            or 0.0)
@@ -666,22 +664,24 @@ class FlashInferImpl(AttentionImpl):
                                                                            num_decode_tokens]
                     seq_lens_decode = attn_metadata.seq_lens[:
                                                              num_decode_tokens]
+                    workspace_buffer = decode_wrapper._float_workspace_buffer
 
                     assert get_kv_cache_layout() == "HND"
                     assert decode_query.is_contiguous()
                     assert kv_cache_permute.is_contiguous()
                     assert block_tables_decode.is_contiguous()
                     assert seq_lens_decode.is_contiguous()
+                    assert workspace_buffer.is_contiguous()
 
-                    output[:num_decode_tokens] = (
-                        trtllm_batch_decode_with_kv_cache(
-                            query=decode_query,
-                            kv_cache=kv_cache_permute,
-                            workspace_buffer=attn_metadata.workspace_buffer,
-                            block_tables=block_tables_decode,
-                            seq_lens=seq_lens_decode,
-                            max_seq_len=attn_metadata.max_seq_len,
-                            bmm1_scale=layer._k_scale_float * self.scale,
-                            bmm2_scale=layer._v_scale_float,
-                        ))
+                    trtllm_batch_decode_with_kv_cache(
+                        query=decode_query,
+                        kv_cache=kv_cache_permute,
+                        workspace_buffer=workspace_buffer,
+                        block_tables=block_tables_decode,
+                        seq_lens=seq_lens_decode,
+                        max_seq_len=attn_metadata.max_seq_len,
+                        bmm1_scale=layer._k_scale_float * self.scale,
+                        bmm2_scale=layer._v_scale_float,
+                        out=output[:num_decode_tokens],
+                    )
         return output_padded

From 37f86d90489dd47b3f9ac4dba8cd38d5907b016f Mon Sep 17 00:00:00 2001
From: David Xia <david@davidxia.com>
Date: Tue, 29 Jul 2025 13:32:06 -0400
Subject: [PATCH 020/224] [Docs] use `uv` in GPU installation docs (#20277)

Signed-off-by: David Xia <david@davidxia.com>
---
 .../installation/gpu/cuda.inc.md              | 84 ++++++++++---------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/docs/getting_started/installation/gpu/cuda.inc.md b/docs/getting_started/installation/gpu/cuda.inc.md
index 5298c22c8435e..69a9842e4719b 100644
--- a/docs/getting_started/installation/gpu/cuda.inc.md
+++ b/docs/getting_started/installation/gpu/cuda.inc.md
@@ -20,16 +20,16 @@ Therefore, it is recommended to install vLLM with a **fresh new** environment. I
 # --8<-- [end:set-up-using-python]
 # --8<-- [start:pre-built-wheels]
 
-You can install vLLM using either `pip` or `uv pip`:
-
 ```bash
-# Install vLLM with CUDA 12.8.
-# If you are using pip.
-pip install vllm --extra-index-url https://download.pytorch.org/whl/cu128
-# If you are using uv.
 uv pip install vllm --torch-backend=auto
 ```
 
+??? console "pip"
+    ```bash
+    # Install vLLM with CUDA 12.8.
+    pip install vllm --extra-index-url https://download.pytorch.org/whl/cu128
+    ```
+
 We recommend leveraging `uv` to [automatically select the appropriate PyTorch index at runtime](https://docs.astral.sh/uv/guides/integration/pytorch/#automatic-backend-selection) by inspecting the installed CUDA driver version via `--torch-backend=auto` (or `UV_TORCH_BACKEND=auto`). To select a specific backend (e.g., `cu126`), set `--torch-backend=cu126` (or `UV_TORCH_BACKEND=cu126`). If this doesn't work, try running `uv self update` to update `uv` first.
 
 !!! note
@@ -50,36 +50,22 @@ uv pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VE
 
 LLM inference is a fast-evolving field, and the latest code may contain bug fixes, performance improvements, and new features that are not released yet. To allow users to try the latest code without waiting for the next release, vLLM provides wheels for Linux running on a x86 platform with CUDA 12 for every commit since `v0.5.3`.
 
-##### Install the latest code using `pip`
-
-```bash
-pip install -U vllm \
-    --pre \
-    --extra-index-url https://wheels.vllm.ai/nightly
-```
-
-`--pre` is required for `pip` to consider pre-released versions.
-
-Another way to install the latest code is to use `uv`:
-
 ```bash
 uv pip install -U vllm \
     --torch-backend=auto \
     --extra-index-url https://wheels.vllm.ai/nightly
 ```
 
-##### Install specific revisions using `pip`
+??? console "pip"
+    ```bash
+    pip install -U vllm \
+        --pre \
+        --extra-index-url https://wheels.vllm.ai/nightly
+    ```
 
-If you want to access the wheels for previous commits (e.g. to bisect the behavior change, performance regression), due to the limitation of `pip`, you have to specify the full URL of the wheel file by embedding the commit hash in the URL:
+    `--pre` is required for `pip` to consider pre-released versions.
 
-```bash
-export VLLM_COMMIT=33f460b17a54acb3b6cc0b03f4a17876cff5eafd # use full commit hash from the main branch
-pip install https://wheels.vllm.ai/${VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
-```
-
-Note that the wheels are built with Python 3.8 ABI (see [PEP 425](https://peps.python.org/pep-0425/) for more details about ABI), so **they are compatible with Python 3.8 and later**. The version string in the wheel file name (`1.0.0.dev`) is just a placeholder to have a unified URL for the wheels, the actual versions of wheels are contained in the wheel metadata (the wheels listed in the extra index url have correct versions). Although we don't support Python 3.8 any more (because PyTorch 2.5 dropped support for Python 3.8), the wheels are still built with Python 3.8 ABI to keep the same wheel name as before.
-
-##### Install specific revisions using `uv`
+##### Install specific revisions
 
 If you want to access the wheels for previous commits (e.g. to bisect the behavior change, performance regression), you can specify the commit hash in the URL:
 
@@ -92,17 +78,35 @@ uv pip install vllm \
 
 The `uv` approach works for vLLM `v0.6.6` and later and offers an easy-to-remember command. A unique feature of `uv` is that packages in `--extra-index-url` have [higher priority than the default index](https://docs.astral.sh/uv/pip/compatibility/#packages-that-exist-on-multiple-indexes). If the latest public release is `v0.6.6.post1`, `uv`'s behavior allows installing a commit before `v0.6.6.post1` by specifying the `--extra-index-url`. In contrast, `pip` combines packages from `--extra-index-url` and the default index, choosing only the latest version, which makes it difficult to install a development version prior to the released version.
 
+??? note "pip"
+    If you want to access the wheels for previous commits (e.g. to bisect the behavior change,
+    performance regression), due to the limitation of `pip`, you have to specify the full URL of the
+    wheel file by embedding the commit hash in the URL:
+
+    ```bash
+    export VLLM_COMMIT=33f460b17a54acb3b6cc0b03f4a17876cff5eafd # use full commit hash from the main branch
+    pip install https://wheels.vllm.ai/${VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
+    ```
+
+    Note that the wheels are built with Python 3.8 ABI (see [PEP
+    425](https://peps.python.org/pep-0425/) for more details about ABI), so **they are compatible
+    with Python 3.8 and later**. The version string in the wheel file name (`1.0.0.dev`) is just a
+    placeholder to have a unified URL for the wheels, the actual versions of wheels are contained in
+    the wheel metadata (the wheels listed in the extra index url have correct versions). Although we
+    don't support Python 3.8 any more (because PyTorch 2.5 dropped support for Python 3.8), the
+    wheels are still built with Python 3.8 ABI to keep the same wheel name as before.
+
 # --8<-- [end:pre-built-wheels]
 # --8<-- [start:build-wheel-from-source]
 
 #### Set up using Python-only build (without compilation)
 
-If you only need to change Python code, you can build and install vLLM without compilation. Using `pip`'s [`--editable` flag](https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs), changes you make to the code will be reflected when you run vLLM:
+If you only need to change Python code, you can build and install vLLM without compilation. Using `uv pip`'s [`--editable` flag](https://docs.astral.sh/uv/pip/packages/#editable-packages), changes you make to the code will be reflected when you run vLLM:
 
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-VLLM_USE_PRECOMPILED=1 pip install --editable .
+VLLM_USE_PRECOMPILED=1 uv pip install --editable .
 ```
 
 This command will do the following:
@@ -121,7 +125,7 @@ In case you see an error about wheel not found when running the above command, i
 ```bash
 export VLLM_COMMIT=72d9c316d3f6ede485146fe5aabd4e61dbc59069 # use full commit hash from the main branch
 export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/${VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
-pip install --editable .
+uv pip install --editable .
 ```
 
 You can find more information about vLLM's wheels in [install-the-latest-code][install-the-latest-code].
@@ -137,7 +141,7 @@ If you want to modify C++ or CUDA code, you'll need to build vLLM from source. T
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-pip install -e .
+uv pip install -e .
 ```
 
 !!! tip
@@ -152,14 +156,14 @@ pip install -e .
     The following environment variables can be set to configure the vLLM `sccache` remote: `SCCACHE_BUCKET=vllm-build-sccache SCCACHE_REGION=us-west-2 SCCACHE_S3_NO_CREDENTIALS=1`. We also recommend setting `SCCACHE_IDLE_TIMEOUT=0`.
 
 !!! note "Faster Kernel Development"
-    For frequent C++/CUDA kernel changes, after the initial `pip install -e .` setup, consider using the [Incremental Compilation Workflow](../../contributing/incremental_build.md) for significantly faster rebuilds of only the modified kernel code.
+    For frequent C++/CUDA kernel changes, after the initial `uv pip install -e .` setup, consider using the [Incremental Compilation Workflow](../../contributing/incremental_build.md) for significantly faster rebuilds of only the modified kernel code.
 
 ##### Use an existing PyTorch installation
 
-There are scenarios where the PyTorch dependency cannot be easily installed via pip, e.g.:
+There are scenarios where the PyTorch dependency cannot be easily installed with `uv`, e.g.:
 
 - Building vLLM with PyTorch nightly or a custom PyTorch build.
-- Building vLLM with aarch64 and CUDA (GH200), where the PyTorch wheels are not available on PyPI. Currently, only the PyTorch nightly has wheels for aarch64 with CUDA. You can run `pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124` to [install PyTorch nightly](https://pytorch.org/get-started/locally/), and then build vLLM on top of it.
+- Building vLLM with aarch64 and CUDA (GH200), where the PyTorch wheels are not available on PyPI. Currently, only the PyTorch nightly has wheels for aarch64 with CUDA. You can run `uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 torch torchvision torchaudio` to [install PyTorch nightly](https://pytorch.org/get-started/locally/) and then build vLLM on top of it.
 
 To build vLLM using an existing PyTorch installation:
 
@@ -167,8 +171,8 @@ To build vLLM using an existing PyTorch installation:
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
 python use_existing_torch.py
-pip install -r requirements/build.txt
-pip install --no-build-isolation -e .
+uv pip install -r requirements/build.txt
+uv pip install --no-build-isolation -e .
 ```
 
 ##### Use the local cutlass for compilation
@@ -179,7 +183,7 @@ To achieve this, you can set the environment variable VLLM_CUTLASS_SRC_DIR to po
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-VLLM_CUTLASS_SRC_DIR=/path/to/cutlass pip install -e .
+VLLM_CUTLASS_SRC_DIR=/path/to/cutlass uv pip install -e .
 ```
 
 ##### Troubleshooting
@@ -189,7 +193,7 @@ to be run simultaneously, via the environment variable `MAX_JOBS`. For example:
 
 ```bash
 export MAX_JOBS=6
-pip install -e .
+uv pip install -e .
 ```
 
 This is especially useful when you are building on less powerful machines. For example, when you use WSL it only [assigns 50% of the total memory by default](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#main-wsl-settings), so using `export MAX_JOBS=1` can avoid compiling multiple files simultaneously and running out of memory.
@@ -228,7 +232,7 @@ Simply disable the `VLLM_TARGET_DEVICE` environment variable before installing:
 
 ```bash
 export VLLM_TARGET_DEVICE=empty
-pip install -e .
+uv pip install -e .
 ```
 
 # --8<-- [end:build-wheel-from-source]

From f03e9cf2bbee0b18b83ffe2ed0e8ddbd589c9cc4 Mon Sep 17 00:00:00 2001
From: Varun Sundar Rabindranath <varunsundar08@gmail.com>
Date: Tue, 29 Jul 2025 23:02:30 +0530
Subject: [PATCH 021/224] [Doc] Add FusedMoE Modular Kernel Documentation
 (#21623)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
---
 .../fused_experts_blocks.png                  | Bin 0 -> 191037 bytes
 .../fused_moe_batched.png                     | Bin 0 -> 193655 bytes
 .../fused_moe_non_batched.png                 | Bin 0 -> 232056 bytes
 .../prepare_and_finalize_blocks.png           | Bin 0 -> 130810 bytes
 docs/design/fused_moe_modular_kernel.md       | 236 ++++++++++++++++++
 5 files changed, 236 insertions(+)
 create mode 100644 docs/assets/design/fused_moe_modular_kernel/fused_experts_blocks.png
 create mode 100644 docs/assets/design/fused_moe_modular_kernel/fused_moe_batched.png
 create mode 100644 docs/assets/design/fused_moe_modular_kernel/fused_moe_non_batched.png
 create mode 100644 docs/assets/design/fused_moe_modular_kernel/prepare_and_finalize_blocks.png
 create mode 100644 docs/design/fused_moe_modular_kernel.md

diff --git a/docs/assets/design/fused_moe_modular_kernel/fused_experts_blocks.png b/docs/assets/design/fused_moe_modular_kernel/fused_experts_blocks.png
new file mode 100644
index 0000000000000000000000000000000000000000..5721d5582c7f14d89e1bcd7defc58fe1669442e0
GIT binary patch
literal 191037
zcmeEv1zc3w`?n$(pjaRlimgZwjf5y*0E!3*5>kWYkTdiMiUA5X3?V8?hdA^QVt@hz
zN)N3FNDPQ{zvs@(MO1Wm-`$_<?tk~QGR(c_o_Nj^-zV<j(otoFC5&qs=ggV2<luq*
z$L7qTF9!dt7S0D(cHeR#%$ehuX)kxe-qI0iYGFKQ8)6^zw`~G^r%`tH+YtM=2?*$0
zTk{y1>YLc=TiWqh8QX(P;J&_zu@$X>63W@s!a{$WfV>b7AGozsN#D@a${uBRdK*F(
zd_QPqZ)^cR!DaCCh${GT0{r9W<KY*gwy;HSL+s@j5ar?91#T!98=9j)Z%`BPKR>u6
zZ)$6dL^*<slvdO!pp9+qOi@<USwau<h){3X8R}aYQ!j&uq1hUsY>kX<X)VDV1-2pN
zw(;!+ccA|f`)KO}U##>kX^$QSOXQ|KZ)!w+_QXLGZvKOErkZm559rIO8Xi<I()_wN
zxKB$*1zUaV(@H2K%Ib|AX)_W)2vR3y<V3w9A|^(?U}6i8LmL*lpla$2uZ1#k0X`E4
zQzK(LT5r@R?NKNTdsFK#FB+n(tc(q5n?&nT-_{m|`LY=jWkFjVt&KHU_!qrH7gWEY
zDRfoC)W{y}GQ=jLLbT~Y*AEz*nw*A52D?a|rKLXHn0CSLw7wAvLw%ldY38m_=?GKP
z7KH-+&DgY=x8X*Xj{A%)AXbCvNE-+6%^!buni{s#oc?FJJ7A~4cl404o{FWM9>(4h
zqb0AX#tnEAVo$Wbg#(S`)C+d@PA~`Cq8zM@jG-Ok2f~6mZEA0<YOQYwHOBz{0@qI4
zTUyX=12MMPhq6FXSsfv4Bq|~-2$~>KR`xTlBPsuZ>vs0GD0AZ(cMyAp1QF0cw22wp
z+8e`+OTA>R|Ct^X68(5nK5vbJG0M`|-qs1Mm)0Xfgho)BAOKmXUBG<iD>0a_PS4~i
zn5Fb-ylgVvIYiIjv)`Z%ja`1c07xf(#Z|LzC5>JFuWqHV2#*kzL1>#P3~%lidno|7
znz@?@c-#M$-IPUWD4U;lP(ENUuZM6vp`wa*M8SH0h8!9j0VPNiOO&nsX_N`dO5Z~6
z^Htf;k_t?U?B_O$C=?J=iujrt+uJ+QG`hZnJqom&A-zD)*gBm6P5A(o5VDl7n$TDH
z%YI03K7WHK4SoBLU<&Zrp&V=}ngX<#b(u8%W3O*(0t`DvkI&kUX0ad|P+R@~O1`bJ
zg}%KhdS)*GOZ*)|JnK4V)5pRVfHJa1rs!<~!Y0tK;FEF*QmR1NeC82h8wbcpQM4_$
z9mQ@#5QAUH8tfbN80|jX(f*@);6Al8ppK~heK`iWJj=+lT>8_K0Ta=;1bixMWnc$=
z?38z~Gd5B}$$=G^T3I_lOWkV-_73bH%E}G^wLdg*NG*S+lmM{&)({f_Yoc#qVQhhd
z;3XK<+St|<SP2SN`{Lfw&o_SC2s#?Wun96{UmHbzK0YK8y8gS3q6i<~zJ1_{?+O}#
zqF>s{AKR248ATDnFO8xA{C$QV<rky1nW;tjp<W>}|2pD?piXGUzW*z=C`~*5rWU2S
zC;xgaN~6aYQ0OmGnJNzYR`#aO6dm+k4fK_+`O+@`K?KdGFUBwON0f!Yei31SPJfcJ
zfWa{hbP2*pYNoP)f!G(y;%^1ev)W!7e4K@A{ukzsYI{MfLeI#0o4`JxHV^+vlLia`
zWc+8~IeiOL6DT}k2#_3P(PSYrVG11Sy|f#arbb4PpEs*T`;u*chUpQ7Ifcee)Bf2^
zZbHmrdf;onj^z>lzhQZ37UefB4?oR2{fVaM_aGn`2ko`B)raB&F#lMiK>W}S+QQ%8
z3*48b<5B2o&>JnvFddkpaka20je~zXN<i&%=1(XZ@KyhUqO@K3bNwTL4w9!5;`c0?
z(j0^s;kd_-pP{v;#4&!W<u~DuS+zB7&9i9hKkbR^wXhHX|G*CJH?_0Yw>LcfpQRTN
z5&kl~A_Q~sOx+B-3NtM|+zytlpBgEfm32g^x_K7X`Cl1&8^B9nS$fFg!)#?>3x4~;
zwg20_aR{F1QS%sIOr3{^=a0e^D)OF%w6iYzi*{cD>3;#j`3f%!!N_H%6u}ZRQ;J~8
z`gNp;mNWWEQnZ&ULZ6ZHFD^ebg@-!n|0L&k!YK&iOv8$46gBM&z$iy-I^aK3c0fj#
z5`&s4JFtxXI<f<M$$wX13V3{<gZrOj{eO9QABO$E?H|xWHb2onfXwf2XJtW~|KBX#
ztk?sl$dA<~2sT?f*c$_g!2Lsv@P)Db#?IOrTU!8cpVE)Ml@aI-M7L@E+MkAcr^MBP
zp96jYxgg;GfPV_W@3%HKQZTl(gj;}5B+3?K&%rO4)5f;Yqti{n?<gerLCNcb7`uak
zouRF%0n~=lCv_w{sH{PN&)5-6!Jg;$FX}4<@MTQmkA>3s4fOeqfk*fI<L-QX!g6~7
zkNlLlyC5Hes=8<jZaNM>Q@wr8a?Vt6{J$PL;0GHA?cI<09Oi>R@3QHcnCQVMTOSnu
zOh?pyd7J<yJ`-n8PyVmO3xArv@J7uF*{QvM<G_5OoW7Fde?F2I68X{}7J*}zGsShb
zG}$kRMrexXUynv;Amg{u$p4i@89$98Ki1M;B7&V7{7{kP4>iIUqW=vp(V!MmK?Rxi
zwx*z{l8>5=2Z4seRK=}49b=eP;e65kSDNKtj1WYGzEU~Dv_SGql>>+AzQ6~+Ua}4L
z(|*i(Fq8dh`=zABuZN<PkU(L$X&d{Q&%P6I`)&67hP&Z8|MsI_aqd6gCW^tf4Ti9@
z*+iIWXEG@K+%M#z{~t-<f!YEJPXA4t2x)wXVgLOe`nQ=SP<BSKc)$<jQ*r=_p|2$O
zUu<*u`Ctq?Ez>k3G{fl37O;jz{}&SJ@8^B~w0+sDG^=@`aE&|^XA<BOn8q~U*qX1{
z>7Q>1_@^ThG_Dbawf+pQ;fGn|7q<koM91Ic9YF=8KP6TREBN2G1T=T?UvCLuiu@R(
zey1Yv|EQ0`&j%X=n&gYYzQ;_-=bQC43|L3}IwnJaR<<+?2mA#lL*+l4G2>+{F~oF2
z??<cyhq`}lqX8krudF(xE>(Zh`#V3+XwYoIZyODoKlSf78Z?T`qB3Tfgs($t5IFoF
zh18V40HL1~Bh<jfAC6M|i+!E1<7=X@>oe1I!D0WI`Uf_7zmVbjeqi@c8!ja!f!Pe#
z56aiS(uDtf|3*kulm}*n9~mnlejb5;L_rEX%fHFV`FZb~(#+6r+bdeI;NNerXcYOe
zy;8IKTsksck^<rwGfWyB_nvWS=gh~Z)y9{vc>?Q1D?XV{0x40Als(GY{6BLAS?}2^
z;y}R&{4zTnWf7b9t7e)kVOm6Urpbb}*DqwUXtVsgRM1b0v(PllZ>k_TDe~`ELEjI|
z&QK=DQPv<k%r~7p-fK;*Zi7zX`TQ#Qj0e8)!2S$$HPAN{H9`O$`R2qDaGZ{y5I7#?
z=M;(YBc>4rP3Zag;5gMxnHQW*=4mOMUr6Tv-q`9-gB`Fa|F+E2;s*bEnTILzW7u)b
z*vJ8#A3_uFpX#grA_6b+)e9rwh$5P6H&fdAW^pn^X{_|?NIN*!>2C;%QOEq|)F@DN
z^jnA1AZQf)ZK?ggk`aL^GDB)Vzbv^AWock)1*G*WcmL0rT|Pcha9YZ2W|xmo2%L%n
z?m+R383%|>A1(IXeg7YxmGU|7Ia4}e=lTo${ma>6#P14Rg7*SH*HKx2?Vf$@5q`x9
z{~fyXJzq`)93Kh0WH7E1fK~rYUJ{&6lFa0$UoQX&pD{T@Q^02Y3y4mQLjUQQ<aD56
znmfNAL-?lrf;xxa;N3qHw6rw_hjHl}P)<~YR2J=c0dU-zknA?0{oo3878~`zF3K@&
zUpg69D4<opJVuIk>1XIT0hqbqKqvft23rciFSN~MN*Ln(LI8vYlYSCYVkmXa#zz0~
zBn1-ug?3j+2=R@&oYf76GdRCEU;eHbEUZ8Ow6c`@Y{O?U;MDGa+S@$;Xx%6Z>pj?J
z^TQWrD97n@@qTe8{I>4(8z^sVWdu%$p`4g(XrXUsXKD!S(u@PAzJ&V5j;8k1(?t0M
z1VKaSyCyh}3Y@zQ3Tvp}KfiJeZtjQ32pLGIC3pvR`Xol^+IMZ;Z~Aexh5h-M%8X5g
zvDwT$g|GeYGe&=W5)KsBga4o9*q`Ya&S<<heIN^LZ>e;l9To&3ae#3Cz<EZq3^{$I
zE<7ZxUg06Bp7Q_4kbn1SLWu8&p#QYGJp$f<wgu#uI|_ar)3-9WgEjd#Nc1az>Ca%!
z+1`!*QaryaP=BSNhB*Z0m|4K%Y-PZ(D8Y>N_a7RBnC-liulDPgRFD5QwI@syRT@VL
z^9k|@(DnxAmjAuxn{Cv8tn!0PD!*`x{$p*RA7M50lPo&??k3FI{9-?Ywc+xBAB_6X
zW$oDtxc?k$PoI$w;Z*30CI!+?ADI6c(Ly(;A?<gq?N7`u{8IQ;7*;zpzT+2wvBLjO
zjf0bssrvwrDG2Az;W7WYoDR4eV(h<AiT~A{{x3r!!f=2SW+nvuste3ae?1PFJ{0J?
zWBzj)c((l0ucfO0Z4CU428O+}pUA+|=>>Sqe=Y-mzvljbk@AwaML`F&p?-gKVfH*1
z_3A7${KMro|AKIZ2vibAv7@kp6@b&SGr$v!B4!5vU}FHY@!ub@fJ*?sKm>m*-zp&R
zoxdCeN3aaQFO-9e;Xl>`qPEZpgL0H}{_LjHvVc)#El`G_o_m@Jryreh?aSQ5SIYm-
zu-892%OD1;Wg1Tk!9aNiOA5l*X7VJg-+v*VoGps>T@CbmL$Baq%h~_H70g=Gy!M-H
z`Ulu^y6G3X^DCzP9CC#+9<v2yXQWTQ>m&Y!5!tWL=$Za&1T&av`mEiV%mgdFUx%55
zW^?X;A~PxJJ5i1~{g0`{U^g1JnzL1@{%u@ENiKco@3+E<r_X;WcV<$Kau@#Fywn@C
z2cW-KOiiY(%j#{OGlyf&!To!WJL-1TJJqR|HRSi|8AU%RJA+(-V|2J2viKVRW}MjO
z&HJ}*Gh9=6b=SV|J*~Z0oeK8c5L3O6JDzv$P|0X_VKyOy5aYIFEMxGaW}AcVhU6h#
z^Q<a|JJQ(+$(0E;bXAwn&!Jn$aps#JJ`0a;5p8Dk_~r&^{Nx=Sy~#M|GMPDZ>A(IF
zDSM{rawx;BeS(MPf8Cza$HMJTeDSB0_jt`R+EszMp7@*GqGfWkjJ9yQjP<IbOA)j6
zdJNpV{g}ZdW47hx(l0Fb6zgS~Wu7tx;NG#6CPlVyS0}>}Feg`&Z^s?kStfG;2ksTD
z3U1&0Ejbs`ujCMAYJILc+hlfudxuG$2%T@JDKp386rgo(up{GalQ9POcAc~i_nC!m
z%M{NvaRsbAvT2sdJb>;QGZot}pJfX@7cC2BuzR;_w#nQC_a0j0;&>ps$$X-3Ejf9L
ztg-q=<|Y2H{fwEysd3zTSz4Rh@*dE3gu@9;@zqIH<T+@HR|Pz5HWCvXYJF2zNuzE)
z#v$rO7ZoLn3CHw0x;P5tl!?ZzGo<b_2N^tQI>o?;h34T=&9uj*go`ja9BM(-<Tj9I
z+kREsLsPg=y=VA_&d8peviI=<h<%hz`DVzs5lYGa3H&kA5h?}9V*BQ;e1^B8Qpk&t
z`Ab_cYa<*CW2u|+KnGBToHM7@51Q*T7mpk0^{6t1s>F<LX+iq?oF8@Sd(mAV#Vgxo
z+#^(Uw&cO=>cg}wxHpDMN{&`K7m<<j4rNpQn6#<a%PvMg8Lp5@j?uHy=dS%IaiwD|
zKM#$v9HPx4)XkB(Pe1M*Xc-r$W;b<j(=s^^hLk6HX;Vj45Z*D*0%)W>WlGm2+B`&+
zNSVtFk&$bfUmSk(Iw}%({{=Fdx`wUL&gwH2*i#-V-6kVPw2#ARn;e!Srp2#smZ!0;
zC&GOm^?~On&Ugp%>t?bn3GoixrDYNBP2nY(r$7i+q})|p0P!7%4&Cu`#VY4~hm_;x
zmpcpit-f8t@0-vTFroffhhURKazxRD4TcKePUtu^A)U0FOM<|JrkVSz2}wc|D$&Y!
z3I`MF>)=O!J0V4gLS<fuNsj=9KI?(6DD+oNh#i_xzIeXdZ7`uRMgjM4C$t!v&@;Ki
zZ?A$0J(V*ln03oGL0cAUS7&k%Y*_+F^DGoP3Q<TqiG>*lD5Py4etEVn`>Q5&ofk~#
zeS*yj<N-P2*4WK<u*#UhLyFLZPCmNH><=cS;~0Krwh4_u6q;PSL;pPZe99@X;9Ck6
z(E|$M84kBw0}7RTnbgcip}%ZGBxpi|quY(q;IrV6z|wCgbQzjZ*o#BGN5O>Ny)fyR
zbwb>LLPiOwV@L%#;)B?&eX~&L07N0JSeB)?0EKj2!+mF?&|fy8-B$;_12sy7PR5Vk
z@02jVPkN3ac1yqNig<;~F|Vn#c;ezS#XLDq?5a&I>J}@u81+w|P+ptLziwE+HKf*$
z&&AHjswbk|l+!HSG#R&>Y9wE=0V_4R7O@W^lgGJ*53=sJeq_LT1&`Xah9<8ItcVUB
z$-z!EbQSm_%klz~PopY_)bKfNIJY6q)*kb6p_nq!J}h?1i_|n=H>R?lug>BmTUwOc
zh-^5oRt4CL_x>>2IJg1~Rca={Mj2{96BsJo6Hh`AmB)5)l^G;t-6AE1X`iWl()DZ#
zcjZF{pXgY}HvH}sz2O_L@jIkYDF#dMKD9WdwJXQl1nkdi5}k2vQ+ImMp+dI$_5Dp?
zX$OC6X-KtP+->xPGBPXlRTp1p8~V18Z9ZpmbF8g(xXA)Pk(gBU_EV~_S3R(Ff227i
zq4Rn$o0KVy(64W93y|a&eOcS?a^5?z{iGd|CY{$A78YO6%qUtyQK%|(rU{Yh51JHx
zTzv_6ivb7XdCN|^q?{P$V&Uo+UBf$yS_x<3a%z(Y^AmF2H)Mr6yscA4E*&7c5^5if
z`?}2U<eogDqJo$-f4La85BKKeuF08CWnzQP5DOe;Dpq!^u{aw-l;2<=Z1+?e6P&)~
zmb7vKmz7fLO3RiS+|=0F3q@9SBGva3GBQYBs)&39Yf6-kz;4wCO{H<fY$rrDrnH}h
zRj4efrihh-K|J0AbGHq91-FR;0_f;VbyZXkrzJHhDj?KYWHA15j}C1Y9qYv47om8y
z$Hj7tIIX3ebU$u<zTs>UTDsQ4gP=!_s}<<6tdQ(0sj?Cz#F%_UJ$wsoiT*9z7Am*3
zLo86`j5tYQfdEE`+e$qVD2-|sd^$ED<Z;J!pLy@`{b#d<V;VQFd}=Jhx;lW2z7sNd
zB=?MKcF1!~WF+pnnz1rfVjr9YcCo&*q>F-hXw+dWW(gcfE3v327fIPM(8b#f1(>#x
zu6xgW|F|wr92spr`6-0dzJ$%K&tS+TCSD)iHy3HBJ(YBM4y(|}HX3M_DFv4DddmGG
zs-?UJZEX7j6LEo&-pHaA3GdPVLL@;uEGhO9a-WFEn1ge#Cw@zhusP-~iAf_s()!bV
z>6m%vgDhQwNjv<4bgIOzNvCISmy;uARy!`DfxgXL0Or1Zp?{jXm|<v)T^C~7kUT9z
zL9$6b(jsFu^(KRL50Pa$#!AWImc;p)sn!d$vUBclk`Ab_WtclZS!>vG>LgL;049(i
zO)hB^$lqtWW-h1Mr^7UD<?#SuyKR<oxTTbB-OK>C6<_U>G3sn(5*Xv1o;28g1IKQW
z)C44TvDJ$k)TwYRTMFR0W9-VX{g5R;kO)ZQlQUc!db%>Iu{0;f8^>63Iv3KSpHVu8
zE`WBuTD<ZU*m_kK%p%<SjiIP9r&*r$o7DnT23W|YdZx(&w|@@}A~^x;si})~Ub-D<
z+Znz>FJKg(DrbHz3Zt+v0?_2;v*Fn;RTac}pS-IygnJ4H#w^_Wjz-Xe-Jp|Cqh&69
zVDR~$!R|$W0O2z$ecn(Y{DY%+>sW;}bSo^b3=p?xuC#Q$(&wiaCYiU@&(}Nfpsr3U
z)p?R}2C`jB7%W;3b<Z)MP9-wZX_$<hl6oDhr-QfjLv-5qUVpnmI=|~u?!yaY>sXRU
z6|R0z51(4^NYN!Jq#{&Nu~p)*WAXU&*thaXX(fJ%u!E0!bw|TUoDR3gyU6v#hD^)e
zoE+zP(t{!5d8_5Ip|YXT&8=nj2z-Jrm(EpSo@F(z4AB<M5e*isqO*nuQqQd1iIF1$
z#uz1pAUEzYLYKKC389uL`5bsBERJ|5ad?GfvurY30?$OLWn-;HPE1kv2mQf3ZX{cV
zj773M&PF>UnIwYZS+DwR39)!wu&*a)qNz32`B5)(kR_yVLnG8D#<O+=xDDsLnCk9u
zLaIs$64G=Yxih+%h2BF-3?K+jSUmp_+i3Es9<Vu|T@#scAVKd^k(XCf$R|hF5+CW*
ze-;^p)>$nzl1sYLhLdcSO^Qr<$Y8>|M1)Y{rRCmfdyP~)Phje{x%J?XD?U5fE-2ae
zYV@s+)uv*e2u`!s!JsBnsw!M|(GP5@Yo+%!cvFK}g;<BR1qv-jHIQxTI$CCXJ@L=1
zh;^63Lv-E85+t5hpJK-sPTm-~p&Ws;Y<tSZzI(i$-J?A5X)S?wv44taQEc<8?w<Pi
zMCp3vgv2AbdYAF1J(f0+q`@75aT)T+1=X&p^Ws}le0oGrK6=vSZ;G6^j@NZ4n3t%o
zC(_qL%GEqcj+ChC(o-VdPU!Bocd6>~km?R-2`IwX*xWv1jV?WH@?kWymVJlhd-<v%
zXB@k=WxemC8mn<&SnJDn+g%8z%epee7c_i_be@1L3Qw=Mnd(FhlFD#4{dPUd6V+YC
zVQj?*vEjWPr#pD&5zR6Tlckjf)=6^pH9lI$D)eks>ljUdkMaNkwtZE`OBK7h^aYwK
zcr^w5%215LMxABo{A;+}(Oi<tP(YULv->NLo_ToY^j(SD)oE8qy%nn_C)RAl#kD_8
zZQ!|jOs1kaTk-V6IZ-%?r;&!YHm22Eil7VW&Bh4LcNnu+wpT=(s5dekV7zgDTw<jt
zd;4fYYxY!`5q>St*3#vNlDonJV=}e`G-g{?8mz;|pasXCGMo^17JuTVpdauwD@S{E
zcvzriMgV)Cb?d^8>K!^C-;ZkNd%4zZ3x9odVy9?K_Wd#Sh!<?#7&bm-8@t%(C78a~
zjXG%uTdU0xA`66wN^|dYE2)Z#y<{*`Xt1m-uHO+6?(&9ZVSMt#u?WHr@_i+QezkFq
zC==WDC8Rxl!F0+Kq^g7riAwoEGWxb=eJif{URc7*an0&0-hui|eQq=c+X-on^!M%5
zfB^t8EwPbNI{Ge4`CXoqJ)QF=REbNAqt|B0@{t8js#{kM9K*AZ1-g9-AUU`ov&+Xc
z*=|l)qKP~>9V?;9mdSX$OKWdY<Cy=5UgBA%iaH(S6|JBv;?KI;nGm^eNTDWAQ{_An
zsX?84Q_Ynyq3sQbiTZ4R6ZB(KWkPybxVyFR$9Zgn>8A>@dTwTIb?Ws*-82DwUr|nT
zY;^i=!J>wW+Y^ca-Z1Wryy}_f9?wKH6XKK5AMwh?bst*y*7s#J`+K^AseVD1iR~hx
zs<$@;>onxXu2&F>id@r#E!yKr!LD=Y5?O%eZF$Upnxc6H7v3b?Z(WW_Hw#>VtACVA
zQt&f+KGCE|NM&&~PF)j!{o$P$0Z}^zrm8v)d~<9vQ(+Pzrp6X&=Z+>kZ4ekMb?a#{
zN6am~n}E=p_~hlKr{}U8&#L}vy=l3XpdEoYnr=<Z5$QV0cG=SH`MX@)oi*>gSC6-&
zeM8h6$s5Lm?e^H;Wg^Wd7+!I=Mz?H47N$2v4>2;0m5chZ-xAv{?wX242gfCSa5J4O
z!<+DoY@Uqv#Lwj^2Nt=Nd_%8bJ)$C%+$Jkd7A+J;y=kbAX+_^?7wsHyk>eE&yN_Hu
znX}xEV&h~uvVdxB*Kt8oK>RWr4_l4$J>84&SF}$(c+}dgfH<+YTQr7Kw2bGmntEYZ
z^fA?CDajI(>33@PR^BykEy>HW7<?VY(`1{!yF$tZ*~{Qe(&@(*NpI2}R5|KpD*jH>
zDIe!4j@`g{`mW(+#=A*&N)j)}n-t$xZtPAo)Dw}<X+Ln<&jmqfNE+jAlqim7EPk9H
zFY)^QRrcT|c6)2mGz7N12{FmiKCsI#s7|G_KtafXOTD#Oudr^DMhCkIa8QS*w&**;
z3g9Z>-o37wG@R-Pso(aEYOx)Wt)<as%+^B}gzAgB)?;{a=zAkW-t9ZXJrV2`+9ma&
z!gV_C2|5}BC(|Ak)ek*nA}>fTSWggSGYV<2ejCrlo?0Gah}qDB_DY{~5Oa0c__2lI
z;S%NcuK4ICMK5~h9JjaP^=_9*F<VwGDdw5vQ9Uh_dxLIg^1K8@XA@6<Rm-UOX3c`8
zE)}+qk`C_3^<#R?vU@dFzm;QSZs=j$yz^p^(3>tUj2|u#`yfR-cMVW$V^eco*T6QO
zSH@a>Qs@*tb)Cm|9WC|_T>DzMTlvwxx-2c!L?~&B{mvSLtL!F>Q7r21vJ*$@;*%?_
zQg3%}>^qB4dscpJznYKh)LhK|QZl2e(82nZXe0uEXDQC|u^-cg?E!8deC>s8{NHu%
z4D;PLh*1zDq;r@rz_eBC<q$mSx;53Jw9S2rhQapqx79p+IQG0XV+(GcHvb&<-l7JZ
z$^ijPPaso%QfJo|%pO&b24<^Z&ZB(Qtjmwi`Os(+9u}k~R39eXI{9L1dnmgJwxsZ!
zOU;$40#@OpS4Azx6eC5duRLt1!5J*NQD1NIIKJEWDp1oK&I)w)-;$t<c4->t;ESEY
z0Uv=&GaC*Nsw@K{`;?Q9<|RZg+i4=Fv_wH|7jGa^c;JSj+=1kfu^@-^YAzRr<1P0@
zjt?keNI6dhgtGK%$*o#Zw>3kt(R!nV(s2x{XFVB>$s)^&FlZ|fWGp5^R=I0R#9G<S
zy@qvF$rq|k+w#C&Rqc>k<a(!zB#CY-VXwnAd(pPUqLq_wmKkT2hAu?44@ZtlKC5>M
z4?h$dJ=`whb-Lzl>ji!P$$+Rc=?_}jc1Mm6X`~kfFs7Z)uue!|%4Ydw-1rck8S^eZ
zw;0Eco@6(P?v7%-W5&o5n*FkCTWX)8W#3t%eMHbx0R{_!H!3B(+6?6}!8r7A`*4JB
zUiLdQvEHukdcOAMoO}0?{468$X<&1|8t@OIcRx#`I8(=XAzqQn#!6SeAQx><-x826
zNTyeP6jrYVHih^wL^UG~>nRc4uGilq9kV5vbLZW9z0t7(C2Y=liJGVojY+w&^vem+
zCTgO0FEK6hFE_c@s8>0r&=9hV`R?6ggy!R1`UfTQSguYq(LF1$BDo)atXC;&JK_R>
z%l#L47+Q+}%bXUsx|tFYJ>VsizT0f{C1dLRv{N017_o_k7}x8o;$_g*!Y}G;R}IL^
z5hD|pHB*D2b39}LCS_!OPqR_7%OL#MwX1zH!M9sEQMI1`#Wu1PqX);W(0;$0X8D@y
zA=DK9$)w-`6+j-i1++r+?Vwi_(DYmmLc+Tx+8g*~@gK30ww1j_$JQru<yinwzF00B
zcHC7~0si{b!6!^H3FVByBp}V+;vAKM(@=ED=K>5)&jSt0uPu0<qI0ggLK;*tn-9Tt
z9&#V<y%eFjv>XKxh}C7!qu}hTS{^$=q=wu5()M^m+>%OSmX_(I^K6mp0E9Pqw+Tjj
ziD2TF+5GNpfgFdesvMl4%P2zgE+nVGqb3v{5fn5bC^lRblp++9b)FP)1liC&7D&=M
z88D!`X;|cTzfs6`q>>0?tm8!yyOj=)K&{gml0yhl$|>_>4=<Uiwx3Vp{dW910?zH@
zsWdmQUI$LI^Y;ow;H8;CQ$D#o?<`~`uR43|R4eDIG*~?TI@TtrsOrvYi@{;B+#J`A
zbx3nm3mv`&XMK1ozNzb6%UP`9!6V&>a*GIeq(~1+xYZpo)rhLD?Fv(1oSJ-`&G&u*
zKF3`|qR{J-jXBWawch-?v^Bp}0NlG#s9`TP0KVTN5)5h|O%xH4Ad|iHaFe%4cLzMN
z$sl&I#dmkC!C+^9Q{%$5!R_g48ob4+XD?gzY(j=8BM5yv@l`11PxAJ6fOyzfdR~K_
zTt~2~x1>+T)B!)OIG`m*JgX3pEM3!e<s@;^?bP=Ej4o+pbN#i$IUpiJ>TU~+t9(u7
z9D8BWrHb@R*D)0rCsmT?1B^zx=?4X!+?qM?^6PMt4*IP<J92tFcMQ4ZL`k#vWX71Z
z9{8Zk77$nU1_0!G{Khmx94~I7ge*{-m_UNSY^Bhc&2`cTeOz{`)YC4H8^AX+thC;T
zpO+wFUddxubi72;A)6$DF?N=CFORR#SvX7#!r62jXirMC3eFPfiD7s4(G2223QgWx
zQ{pm|HmP$WTs_B&X?YHMRf5EgL8l4rg25H6LWS~2@?j$>3uJMHK^ZGmU+ks7uCCH|
zT@&~E!&9ZALD4FU8*T!Gn54o(mSt{4U-u^#4SF~}rM<^P@{lVV9X3@JBByNTRv$_(
zl5)xGx>iPH&dz1foDyEwBpb~XVU+!*#7j>+FRI>PloH%{hYk-Btnorh7}3RCGOW8+
z!d;K(&Wg}!5`M;@P6#npuVTLu{Lc5fZ}w)p;G{4nqJ2+wRYE|BM6seo9ZSl@NDyf>
zL}1e$Y!W_)DPM4v5QA|Hp>R?o%CNO3TH>P?o|$L=*rXjE*BF>={Wc&QGj$2cs~h5k
z`f#~OVj}oj>oR|!P10gnnyH8fXcOR~F}`W0WvFDA9XKXO<Yq=lj8AE|97x^WWsu4g
zOLj*}b|c0n@)fHewl-qJb+i)F>UGT6lpP4EmG(opxYlj~I}go4b`$p#4z5D@<~w|~
zr$z7!bb9BKSJ$TJ)Ow*4NkLnPRv3x=;^AtG$vVYGvq`V<j1tAS8Z8pVgITWn*5OYq
z3C+ZY>}OrVB1@CwGJ@D!(Zx!(o9bRzu?0%6nraaCGHA;Pi%ec?)P1URNWE1mMa9Y_
z=&3mQR2j1%gUwPxVsupXULVV%Lk<aAiSCb&mvi0T+DMh0IUYMfY)j6`Vf8v_pJh6r
zZN`@{Av>*5gM}DevwtDsP(aYe>zb9<x<kc3IX<jqPtkVJ*j6@Cg<`y`j=qOOd#hzP
zb4iGnbtm6hYV3PRHzS#;pw6aPJ`Qp3_ET;(V%;nDR~GNl*G<`ieC5p+@Abr!T?mON
z6ZC{Ga-(#HwMA7ChEReu@n#)1eA6v5Rh)c7XJ@5tZIQ2Ny<Yg1duGxEzK`Z?*I&I|
z!ggmwX>XwKg^tx7yQ^L_?>HhyoQ}kR5baaQ0*Jh4qOvfH630Q{_iBYm=e|h`(YI^k
z+NxDdejc5D9q7P79Rh&_g1H6PcT{N$ik9V!s0sQfH7k~UIE}Q#h}2x^ei<Pvh!qK#
zR94$<UQt4CT{95wQefYB;%bvc6{1eZG$=jnrd56bRu4y)br_3oww;o!V2>+e?j3(K
zmcMU;4>&5xc2hd}^_}5{_suePkX0~>$H_8mw;e=<@%xk|THd_|L81$42A5%pTMQ)5
zVCSbLRF&hs_38ufz>J%E{yo^?N2G<Rh$57Dyklzw5?f-IFIdmRM_!?NSFng&y0)#G
zRkch-sotkDpLNBF8efEDr$%^xPESu=&FBFPS84r2;X*HrN!+cGljO~{J<+FfNv-h<
zJ-YQ=>R5NZR#6o4IwjdntjiI8uEwOJZjC-ys}ez|Wmy*8ej>`(-wYGzDU{q$l0hg#
z7u3y-pj&8j8ffEZoc!`MZM^e5ut1-&h1?2ASo*Y2sil(MDX`16G@1{-JX7>ySb~5x
z!;ls%4U9{4Z&gOHpFF;>wWsNrnzAIet{KNP;*gIE^wmpWE1U<{0(9smniU#H8w@%q
z?5Q}eY4^khi8smc8P++GqO}FHwG=oH(P9<#ZsBxu#Dz1)NH~dn`E+(<Q2N~>US_@$
z_D$r06Wg20y3?XUjiS$*EvRLe6v*fHXAL$LA%*J@rLa79`|;`PFcKP?<&C}JHqp`H
z(U%6JR32<pStPYjj<|xO49@KD2YG(36@^bYsHXbm!mSVQhv&p74mq1@XoAQ`jBk3C
z7fz<#Oe5;k$6CZD^<=ECgZtZP{f4))JmjL9ZhE{`_d9I;*>I)_%!eE4);Xz6PsY*j
z`jBzrfm!G<l1@BhM5Fz+oa~rKiur;L8kz{q6wddW6y_3%Fl6MJuBKfbe%H!Z!ZT`F
zPew5^H0_LXb5|ESC<-Oy){NVPE1aybPsFw)uJ4geS%MapsQ1$EP}m#F$UaQofl2CL
zJ2X_5#}i8z9fHj8=jH(py^ZFsyh}9K){AjjFe~RvT8xB61ri-Q<2Muf8PBnqOktd<
zT?TVUTru3KXUQiuWMl|6<!cG<NO%0l#$<ELyy57a*z6#JAr4D;ZE?Lew-vj^-{$m!
zkZ%1kW3BM39*u*mZ<2b^QwMQ6Ha4b!c?u8lE5pol{7h5q(Y+1SwBOwWbO&Q1HMid0
zk40VtY}ov;E$MQJAaKM(lMNogl9C4a?d^HjxfI98<Nm^}QTLH5GAkyp0rl9vGYZQk
z$W!+4K2lrV+lGSXfM(+d`rLaOXzlcrxz1^}%3}*_=xhGhuD<TKS6=WAY$=aG!Lv#R
zLqxr3UrHTf8#KgrpI~heJ>8D6#I2!P#f4lvGS+zsbks%{A(lYy7hNNN016o|x?KVf
zz7;5@&FNf=DK!loSLjURBXM~HzMa8fT~)UX>uBq`3dQZD%rYrR>Ws`aC;)79REo_T
zfW&?JEn&1~%k+VwSjfw~kAi#;_<_4+_)42QyI6&qGo3H9Q)V-l{?l>}(L#kQ7%IQ+
zd<-GY#RYA$%kSPt?u>P1p*8z5)ZPx=b*>aIMu4RRYxpHnKqa7`6u?6xk-f~+h0Co1
z&93bo(cTHI)a#ZXywY>fN;5?lQ<Fa5OIm*k-|k)nop5d59?GZ$l7{!*eA1_F!o^2`
z9_QFD+@q9qJlF^Bwx4KAE4dDMS8s0~rD_3S-jRjNf?LA!B&g(JQ31{5p1)VUw+KYW
z1+}E8ZvGs)<q%Z=_>NBy>fOVBA-K!lxqe9Vm<pnM_sd}z&Ta(6buD+KBsb=GYzE<V
z{KWuQPC>Bq=j98h0sT`zkL9}S&r^XqGZfM{3~81+3_|*D3~Qum{PBJZD9Q0#+e1Z_
zN4J7zs}@}^ImZd2@kgCTL}|_Tp8&e$sn4_5l)d_1zSqMQ@bD*<W_Buhytjf*Ht+p}
zI|`-QIe9nI66YQZA(QTCvwAJmyUZtW7dI}X{B$wM=$=zEV*U=Zfs}>j)&o*2qakK%
zX=$FF*|Ndi8##Fg12%!rN8UwL5&VYPJRxQ~^KyqQ8{m@<&+Vxxjj#8>hzaZgsWa#%
z2{`S$cE8#~WmOrDul4|J6@KAG*Uk3O9w;j=n0;y~(A2WB58~Lt=i}=|^JbTWzjSJJ
zkce_{B>3+KOt(!xeC4d1yHx|+eWg>om3SFsQM8m=;eg^-oVycjDBWDAtgVndEJB39
zzQ~tCl@9JIcGqk-MS@Q_XAIW*l~Bn*yteRV=Qc>94+gZ;&noY)SixP}D&d0&5c0dO
zej^<o^GkVWSUAio#57Ewu=%KfM^$S|a!ZWHZcVH`rN{}|-2Ddu2Oo;bJppf_y@y#v
zG|7{JNw((#GwmX9*4}{_)v75Wqxmowo&_S#d}0r^2nTfZKpS+_B0VRZD%~<Qfagn7
z-W5&ProGey4X_doFi>9&EZ2(}Su~EL+kpfN<d$*MgzF~o6=U~w_fHS;Tm|&t8<Q6R
zi#!TEklF6IoJOH#2SUK4I&KFqqAI_uR^YyUf;1kkOrf`hRI8ki3EaUTPzH&5vfE)c
zUw9K#CB+(*Pt)hCFcip{Z2*t-p4cG|8(=w*)!V&dYbXtu&!s=c0aS;l9MYRcsYgda
zouS@5+M<A99|g0zdu-1IngzcEB3*}{3kWQtv7pS|tzd|mJn0QqbzI%khPK6X=-z+=
z2cOBc{Iax1cHRdqmh+EH7YSaq24kG#<tu|@EOJ1-56ew=u#lb=6!~$!u!DDVA!xs!
zYs(F|AcX$VW)PEHffR?`jYy!Y<&3wjUI4FSnHzZIL&EYc@P2NFkjQexlL+^b7Pc~M
zkf7;`ruKJxKDC8nwFWvWEJs@Wd5ag!b8We>(VcH1rK2igW0UCyxc@ZJ{|n=Xt9Q`)
zXOZJ5&LxR=#u_%~`{_NuXIYz48Kt^IqAl3Y;+C3v@1)gW7fC%vJ?=%Ox}KXuXX!&9
z9oOC$>Zw-s!F#Y%6POrHeE3jtM5w#jI;%AOyF*Jwo93KEWes#?_7&r^25Sm8xb>^l
zqH-p>vd6klc1>)Mbgt%3tur8WJ@%0tC|C8!)~nd3)oh-SRp~s?acfBAv0khWA%&bX
zHHj`$5>XrP)x%C0*Jn8NwU*^zv9+#!L~<9tBxR}e_-iIIE&t7N74i*?Vz=Po-1Qkc
zo2A7p)$3il5*sW`h=Inu*~9m*tU=_}q@sGflo*TL$H*cl9V*l;W8zXC7l%l=lysEz
zcLkxcO&4r*eN^w%7BcsRfM2~`i6r{<-UUumsdV@Eo}JI#H&LaBtzTDlbC+tYN|=1Z
zi4E9sn;uEBY}x9Ga2<msYXu*H`BpH|g*vt3jNP#Zm!4Z5ysL{a(i)V-(?$$xThjoj
zhJLzeN41)Uj&WaGNve9BmS&Fo1ZU-ro<$!w)AnV*Jdi<w!?`jv)o_;uT2N9am&EbW
zv7VebRAV;!1*=B&BmHNwM?$4ob4jKVA;^8_7e}2^OT#~7mZ;vHkZHget*&kJ;>mkL
zcScNVS*-k(wc+(9E4M2?YPo&VDynDqPE}F#P(8+_#CU;8jZHXT0qTg<$OAIisfHYR
zG0_|M(0TRfTFXkj$q~>+C+B(|LYKfgar;Q6J9|j#C-l9Zr0ls!ldm^`)qT2a_ylOe
zvT!elX)f;K<Z$y+^M=fHwsT5>f~J>1nuc6YNY$|;3JF`*2%ZF=^A>dm5d($#(1B5Y
zh>|^Jy6z*!ov-gbThTbw_(nw@TNN}J<(@ni=g?U>_+;MF<CzJ3hdl1K-m~WU;ETs0
zOlsOALalhS`mYalFIQOqmOQ`D&y0D)p6b@(kWTV*LT5aoi_nvepSN_+Yo4?wPfkC)
zy3i)}isLUM!UFTzF~K_6iGj*27feqhzDK2uUyC8gZ_lTT7_M7zA#^XK2By{u%%iRB
z<z5c3j&N)de}47F<y$hte9daw)tvH4{q1FO#)Y?2-pH>btwUulKcPgZoWFQ=6B%TF
z@JocNpGf#d)~g>j6TcpHS5d#T#yo*68IIMs8@{X~7GL~06gx>`IW&?lctpH?t4nu!
zTV~>dMaA|;wl|c76B7;do%dE2>`v(N(lT?mZf%SSmmt~49-Gj!e<PRDlRd_>qK<pX
zKEv|947To1g}kZ0>=HF)N2If=b&xd^5gg<XVSa;^o<CSqR4!?xEwsl=MbB;ER_YDK
zw>P}!erVtpA{QNd@*q6yMvVjO1MU-#o-oyRlt&r%WDVUJ@_rhP${xwfo|+u<_N2d+
z^U&I?>4`^!74OtzS7}kgP=ia>N8-i{SjE1xOQk;2rQZb!#nojGC&YzE8mA_!)_I~m
zcBMtH4>n%lSR)o;z|fp`r4N0PJ7$+?-@Jp8F}<d-+*(N|x+>7*8qyKz2_qm{1CON&
z8)4AB7#Kf;M+X+emdSg^)$%6Ad{#AX)fe-U&K`d2%W2h}RHn@8nBj`8wM<RsPO09^
z7wNsoB5R<sfhQ(jhv3|^Yq(U>bg5+jRWiOrO3nRng&-@BTki`tR&37#ljuEBAC>Oi
z%D!h9n2<dhJrq02xHi~Sp?;gcwt;Gda`O2l>zpS(bSEcQYMEbaQfwq{xZvwxl`}Ee
z5T6)=X>h`=a<>lG!9)*7xx70rr)n7QdL-D_+jqTKDT`<+j0GkkTc@`V37bc;g<JVW
zRS)1NM>}iRn#aDamu%Ijh;==VSP<^5;1J>OYZRZ_;Ovt*+2XI~d=KZD5FJP-PPQ%U
zRn7r|?62z_rQvZ5;~NKL7TpmQ;f#rFSrN?cP_Cl;u&_&Zf%N2X80LY1OFj;L=G6;S
zv{Xi|6+v@w!u&8^#X7v{<J$`x2RHO#V%1t|V>EaZ45FgX|9Gl|fy$HQms|V<*{k+6
zUq*PZ3XXCP_jNPFR7R?Vtx_4>ZB!h<|E!}TCbgPzz3t0Q4aVbY>=os{2=;+;6@_~C
z!KSlIo#zI1Zz|j-+SjA&LQbx-05v4Nd3SwA01x%ZdE(jhT;Cvxfoe8J$2Wy=o<eSH
z)K<3+cOkKqMwu#;Yg1DJt@H0VzFWk5uCLZtqWgnd`EgMN*enkN#Qy@j1UHwaUEgyo
z8){M<@@nTn2r--#=W!e(r@AOCOg?+)o>C$@cm#7XYV;vpd{_9`tK(V6{F^$J6AP65
z`^|^c1|<zh62<+QvI6W3YNuLz6rm~m8Jmd&paa?)f%Y`&;YlsP1e<bkN_~9F#Oo-x
za-42a&6;qh4+qRcrQD*&w~eR?fh_n^Q{9?f5rz*#-3D<&R&~`b8iYv}gAQL2idO=3
z>T?Lk&8X(%Sm2QcwDh)H_2Tv)PKAg&JYIQZ&l~lVmO?!V$dWsj$z~OgCnv^ShVHb~
zve>s@G*pwD!y+0bJrL@)xU}c6{&l*<E_Tgm>nQ8P62;}`mu~D<%P3zhq%Oo@GHUgj
z{p2JwB1eyv?X}9Qd;2apd(AIz19k=_WLD0{&a0;SGV)1+CLY;fQ#v}(=`s`>^+-ld
zDHrFx?D*uEM0Xl6I9+@wg*$X&Qvs)WUa)=AS=Rs)zZ5`@TX{HLnA<ng*)SJ-9~>TP
z#JXV4a>vDGek$0W;2P8|raE^H`!-^7i+~h^S50C#hoZt^e2^zX?fLOX552<}StexZ
zChanc^e}PlpQdDMmWb3f)z=Ce7qFmqtfe^A-viL|JAr0Py(8(uqt;C_2^|2OmO8>J
zwnHh<2w0_8?T8m^_0ZPp6?2m8lvxEFE}R5_$9ApZ;IqC~VqZg*O-VzROWOpyE{wrW
zgUUELbIH|9X#8{zD#sJBeps>*JKme}&U<$au*-*Pd!@6ucZ&u|W3!1kM1Jih^#hPf
zwEWnV+rWRke%~~E8UYGzGi1lXp)`6>N5QmE?LmIh%FcwG$xq#@cPo^!BgF?vclVw>
zN~l`T#&zP<?zp65krN+nU~C!(u&HTQJ4}ZHFeUX?hPy7b`!XEzK+EuJah->8z84S*
zR;!n6u=C-O1g7!O_#qC)hER%Ud>MRuF1;r}o(#9%iP3sIa1M;P`S!Jg({Ohifc?8?
z_+`g|dcOa7#bwyjTy`-VJknovaXB0gSq|{B_r+J!uj_c61C!e3>VexpqbwIE2q5uN
z14xkj!4V01IJ)Z&%v}2+(iyD5(bL*<99;pB;B~&TMYIby58s8b5-{}DAKyNUJDF<5
zswTA2v1u;g+UBlD&zSLrJcL;7XJL9Uq2_!UAONDs!ovrc%)Io$rHUxk{6Had<VLst
zYssc%dE1qPPfJhq-+=JYS`jO4M+I8YjWSmoK)_2Uk8Ov&z*`K;qHpO5?cqUPCDKz#
zpre{D5<U*#-pVkAr39ev=hD^%J%0DlTu@QJ8g?BV=VGfgTP{7W<?20LLQnIo=6HAl
z8Z*9Yrsd(5UG$L|>aHgz=5K(Ykye)TK&)L`v9Rs?muZVb;b0$F(#^fV0bBgR9(J!D
zvPEWgUam(z)SKr_<8t7PLN8OLvKnT>b4WeYExwN5;2gWt`)hTUw^KLs(l&AqT|3zO
z!zEe>RVo>e@iz5{8>VV@U>WC3^M?nRD;<CDGV0c}EI(bAqva$Xu0e0hGJN`_=laGb
zn44udMgi^K=xvdwz{{%!a~Vv?HF4Jr=7w)r;4#!N)&L3wA04|48-7~=J`2?@w^7Nm
z6BIscE=p|Y{K|3N45E>%dKnC;g)Z$N-gS6rG~alT-ehK|HyyBf3uj&*b<sD0TN!(<
z+WuB5=(E8-6NbCXAy7C>$G-<!uZKNg+lMX_%>phUK|do)rVaSpmbKZ5N|Zx8flSI;
zWE4QP$0+e>b#f6rE$8a7n!@xl8Q_`HtVtW3WUl7S2yHx|NV<3L9xC;=0Ufb>ldw(;
zH*kYR*x`+kc@Cc5oGr7M5}4(v2hX(YPNfyyY2xD9{X!IW$TfhFXuojWK=J53R)7`X
zS{4-l2~eak$d?Un1-;d`T#w^C#f7GM`B-h_5$jd{w+w<|ricNAi8t6oi$^R1;~%=H
zq~QeC)S{H{Lu<9n28?)-ciA%vgCBs1nNcOpgWn<QGb7gh5-PgT;J-tQb)N&{Z+jjd
zlL`9#6y7XHrQ))SP<Z<}+r_ukuqU;Ud!s*T<rkjqd!RXP$E+@)V#TAhbgfzg=+mjJ
zciPW_!YS{Vo`0mMxl)Lj-2;AwU5kp}JwA4MC51k7>HlLdSYiLiUNpP%AA7-I=pUlJ
zAoRR0E-^$+&x&CTS-D`aFceXm6|m?IP_v$F{!!G|Vz1f)J#}+xPJk4&&oW62?^Xv_
z=tgCS^oLI2lRK;o3e`?#(>Yf5sFEu&V>LS(vyL9?zfx6~i<}w=V{3@2&ulab`q;cf
zcsN~$X+wUFy6I`+R7H(c=w!wOe~*YnSaVqA{RU)Z^W=60tc3DLU_{Ey_|<8e?WG*h
zMr_=7sBgU9eNJb?RJ^v_!YTseTFtU^LaYZfW}6ozjGEM|d_38j(s5{@tB0Yls}eI>
z6VupmQsC9ep{e5S6M~h+?%~bSxg?gJ#uB#_)m+1B><zqSs(L;t8MWTBN71>wxUSd6
zTm^%W_%td1J164X*t&fTbE8<;aB%Co!GcY~1OB{{iML{^9#7Uve;~__nHFQvpTdG}
zD?SEgMTwE77f#%Z1HYjX(#8v-ByJzFR{y|z>ASBZTvP?MNP+%6BiY<N-Gb{z4MfH)
zMq_+p?@dl#FYZY@Wy(ruP2Q4a-QGkjPW8`kH0E`cEOkNGkDheI4n9r0Huy;d<AkkD
z9++c}Z}qKADxNPqk-A9Os+AzzQC3L4Kw4>W!6BnIg_}8LX@sP?<vKEQiFrb&EaB2%
zKU-C;PcteNk9IDtof{rv)mS|Gve?BPBSB<vY@8ZRt{-eLZD8iVl4|QhBwuvtjtR2N
zntxI&!oe?PKm*lNUm@(?Iwp)PxG^}McG99sJu9|v{titdYiEG(J*kP7B`MBv$+0$M
zb>a6f;v06XtI#cBMqw^7Ev~KhWWd%c4%LMjULXan-1uoycLLj%xB12r?Tv-pshL-T
zdhh5A$(P)xcX>FvaXHAiK4_;V-saLD0=7zh#dTB{Tnk`kGfG8|z*7_jZ@C|i?McDZ
z-VBvaMT9nF7iexER1BmT<Fm;3j+{?>aypnjb>Pb2Wv1Zn$)@!^BgCNY_6d?<sE+AU
zch_s=EF0<0$NG9fh#?J3S08to%4>O^^<!pvwdl7WvbydnEg%(lvNvZ*Or)aXw35vl
z5KML0(LJR#%u_?#DpPu`2CK%UyPA9CT|9IKnUg%&Hzu~k7<=MxX+*X#=$QFo(YGW&
z_Ijb+s(Mq74Xo`x-4-#}9NeRGCgn<MSXZe(^CM@WwY9qILf%c1o{x>0WUtqkNZC_b
ztA=^*W*+GIbdyVFJ2_?S-31ZtWx}=N28$-&C|{9N%5A7ZG~yjPlI@0`s-<e=q~}|?
zM;u|!x%o*7a36SA2u=Zh1W5ELzNL_gPFT2>MKr}47Ivdg)cTO0MU~XBCTvh|sT;C%
z%Sb(Jo>Z$cII=W>8zI`6IKrCJ8(2%Y8taq#Oc4`dsu6^(D_dHe=RWmV=TqsVZfJys
zi+{2%K1^po3*&+<Q1ziV26dncA`>2WvY%~fV8-|28&do3)DkPT9oTyu+ZR}g)LQlo
z_HF1)ZxKpy=(9rS2Y58Z<3Wvf38}r^yQ%AS3R^k{d7+Ep(|N-0;#Ou3P2nr&RvP;3
z*0J&Q%aW)RTt(hrR-L@>h(z~%^m?`4A%>|lhx&mUTd}tcc2I8um=pW{y*(AZ%2-1T
z!g!N~>&nC=D;qsoqh%{$YV25T{EK^oFC+YGZt7*9;1aF9->zF}-npC=1U#-u3X8sB
zy4ew5`jXwtq8ojtLYqsJ&>Pn<IC^d@ONwAs*Em@B(TLCza)W#e)y>Qj9P)@rtc-uP
zVQ>P~Os+FQ$BS-{Vm~brYnP@Wrg~3jB2Xzy%9Gw?9(p)5+3ZoW&PWHAd^W1&xMSVp
z#^YmllRcH4_ii^-+aK@i-%+bEKA}{<ofIpkN>0j8T|BvOSKkeK6Jy;u8Ssw220JPn
z*7A(Dqdd@#nuaW~{8$`jIjARH8|9ZAvoO^Dc%srX-@(BLDh*cr3`fNUqQ){gH&z_?
zIz)c=$?wo${on>vJ}Edor&<Ir_0cnqy|O*~g!OH9<&k`ev-3?Riywtf1z54|$%#nC
zD-lEeeBG2MMh+*u-utYEJMNX)I(o+9Vsd{hTiOzLD-Bea+2gd7PK^dv-zmM_yrN_1
z$_v6D(sel9xkS)GAAG%(w}|c@$4Y&68Fzv_ckC%yDQX~4#*xm3vG{>?ZhV3x=4uwA
zU8PJ?*GxPxa*H=(xmkRg)(igU#I-$UJ5n~9qLf^#<FS1wdPq|2-H(*kcE4vZDe)sz
zFx8YRI~y4$1f20bqMKA}F1Enr26nQj*x7kO&q<Pk(9nsTR0ngr^B`g~{#2@=dK~RY
zxR){^KIPCn7P?N4x5sZH`rcj;5-X7!f3<D!Wp!cZ&;+}K`w4mLRSi?<VffuVj-y{z
zXUVR(2;S5WG2W4qLyS2dRXFN^s^Z)o+d&t`!z!E6ujfBCHgv1qH^DDlvWMqoD2lPH
ze4O1x!(pV{Ebe&KZVo=oWN5r+>!yZ>V_0ISp+;6seeJ98cXJNc+Jv)fBP%S*s|&Nd
zZ-X~Gh8CEd%prH%F)-ZjL*v{=o}FYhTrX{YIaR_UM;O#XP31ek#rgVJl}6qeDk9w2
zh!1M)HYCJ17Vg%)TR0_@a8^Vz;hisn;S3|4L+_MAnY@eKN_$1MzE=%P@_q4FC-a}d
zX-rV<0HO`7bFUqtdbG=K%hY)y#A0<YsuDHz4wcpww|yRcke#Yf*L+4;tcSUtCDyyC
zjQ=1jYh>$WUqp<!7|H8!G}p)@!aSv+yHSUXt12hr@)%>B8joEZAV{=UN9U^RvKEwJ
z9qW_K71em8CnmS`tf{|L88;w9cuQ=~luBxjbL*SPJfk?|t%Dyj5q_hV>k5ofy=X$b
zZF`=gQ|UUx9CA<UDhH9iJ1W)@(x@_P1>VK$2Mcls`!-5DGSI7Q-b=~)IB>u`qedRa
zA*_b&8NH^u4;6)o6PHR7dO6Y2?NSK#=FC(5cUYU#;yXoy2F3&>nxmNYCt|%Pb-G1&
zi|^Vvjs5^h<cv10rq#`>Eqcx`+Shh3MBlu^vKbv3ce>1dq<}!LCsB~J&wRL3L9K3E
zc!*AhL{ZLySk4Pe)TjK0vb!gbm0cq?*1tKz-nh85Jm0%8Vg2w(7b-eIO(WVwkLXxC
zX6P1Ubw$1*#KhhFDBkIXE2p1A$!<mMQ&_@SkyLcr828Jx=1+@%>@Z)n1K#)c3{E_7
zA8~AFx?wk05a`O+r$_jx$Q}-hJp~RvQ)9peTKwzo9^g2(*hFYhp&A`@CW^tkdji1C
zz2!9b1J#qBOD_wZ>T>1wQkvIEE5zF53WoT2r5&yYDF9_J>Kic5w3AJs_iZ?~Z}@Lw
z3ptMdN3s8Sj1oNkPb&XY7yo_*d&U#7md<7w6#f=ne1m^FoGu3JOUv^YG&sw#3_~jy
zJGaM1Phq;UbKy-;bREl5&=0?Ebqz$}eTt6pEu#e#WH>$nvvo~x^L#jN{Q`;`hi`A3
zKCkRL6!YvFFh<iNwMRjv+U3yce9FJPj_Ste<IolXgMWCK4k1aS-3d^5eOd1xK!^`q
z&Ad6|2BWAO+m#LpN2c4g1-j3x0I7S;P`pV=3X})JBZ-G9cd*lrev`2Q&XRZYOAQ*f
z;n+@Rv$(iBYe-k(<CA%8%XL9rRGeYHFZ$D~9krnTN23>0vZwDd`EenSPn^8ZD(+a5
zk~@{DsK(%ZVNf{~sHsB5BxGkO;RLp|4f*$%i8h^?2bEah`-&w>25ZcDUHacBSc0ku
z{8%>um8jv||3N>s&elgHHR6slXOCWOH=)N0B(S3&`?*R*i@h`{eQ?kx(;XCH)I!BQ
z?Be#1KuuYNR*HowC{s&KuF{nVwW5_EgYyZt0yMSVF@uVx=6HNs2rww2?T$;=-LuRB
zUJ8u5ttFwTQ1{U&$&vRr20==-W@4xj1=W<vOf+?Y;)4{BN`5_t@5z~Jfa<w;)7C&q
zF1P+SVsD2!DhlIsCY^giOhVl%(KV(K!3_x=`qGzGmsLfoZ0W2^$BW1dk!4l+KBP)d
zjhC$zJl#Z02Bjel>-#MA=}n-zGn1}VK%N@qJ-z#D&0^xZz?xEOlFePrBYHJh8CxaT
zrgP;VL8jxWNH}~F*2Swb!DsOrdTtf9BQ2?*1gHk&*-zQE7A1m$pJKD}NR{Qml7rQT
zDft^*j_;8$%v)R*kB5q7n0PanIm)@y>hmDVho6ab8R~9ej+PDb6!0>{G`gh@MRkF9
z$giWpOG9(-(eWlnE-(>kbj3r3B9$qYCtW5-+TxT$#2eBY-N!6zZ40@{&3@7;S40}p
zj!My9N0{TW0w}6m3j?iSR$5DUkg<5Hxa+XB#M^T#yFe+%qkYU`Mo{6GWkvL@)P;Jk
zy>d-2K4J)_5hOV_9iuP>zZ5V4k*REa@QuPoN*x>6h-|h;yB<NQQj7O5uj!u9NN+z8
z);(7GMv(z&HrkeCT9%0dHHbAE9Gg76tCCJ0CKLsVpmdir&&NH<bm~)7*sgG8t=gOX
z!@OBWpenKt?Y;z*gxR;@e61QBs&sURBex{7X<K^?nSl4D%tj@OyEFTgCHi0Qi&F{`
z))mzReS06_0@Y4IeUoE7D1RNa=G7y8i)q3C{W2g<OXu?hPLAKTr#r}043fbg7!J)_
zz}!!_aViqo0$wA(%wiS-3IQ8J4EEN>6S_MX48)H`aVX~RNj_YZV`usF+@cgvIOH@G
z8e&_pE4)`5l)f~yu}1EqkEyfYs=8NmFhp+@!(dipimuc&A%UcE7rRZrpXnX@(u0F_
zb|p@Pa?cGFdq9m{R^kU9QF1u})E;CuU!RmTo6Is3FOB}FGO>Go9oTHPMcucNE#MTQ
z0>F40C|6KPm5fPBvQ@=Cj&<Fr^*Xr;PV4W?0^UeFu^mpW7VNwVPI4kdL*;(nTW*4K
zro%&3Ck&Sy83o0XiLY+#aG4qcwWqp?eHNO9O+AY{8*_3}?K(mA=|?&N@=jI0i12sx
zj1O2=jSvdQCEna-3EmZa>~%!FuDiowX;IZp=RjfH-Oa&$ex~jpa~Ghy67%OX7mp{J
zY~-$}8z2|tYNF3z21zC^x$n|!nk_;cDm7PK+a(&~iX!b>DrqzZYC#Qpm{;>XIlp+#
zyEr*eLFPYVxdAQ%{RFvZ*d?ps!k?Q*p`t{dQa#J$icA^#{B09Wv4{P6Sm)weV+}nS
zGm|8^Peb*My98BFwKNt!Y`#(!ab(C>Ek3<9Fa=aOxqvrw!E0v7C~LEWxwxm>8T3gC
z@<4|?TPmHTBuskF6vrad9uVG*F2i^1{s5v1E})JtBGpb2oYzp9WUAUD!AMA&M=2A_
zK^*ZP?EzJ?g%3WRMbu}7*IU+*!X?@SG34ikRyB#3=gSqU1NaRs@I>L0t*K6wH+jL2
zf-g@o;5aJl@qod^{lG&){_&ls4k`%QZL18?=02Iy@2hGy6xev2OGY9b%ztf}%7OZz
z@T;AdcHD_{l*jn*x~X-{I)$84TT_HXWEI?XFJvy}$?8AWS(OlqX{*lYj5i0`Iofo`
z2_8_k=PCh;CAw~D=q65$w@I^3M2dRmbuKW0iXXDZKNYfKr^;22kgN#gl$TH;83^X<
z*f1{D&~<8I#Hy8-W0KdsMoqfeaCY1cK~df?0U&>ta**pR!R;R&baRaTKla`-s>-cx
z8>YLv1qA69Sad2V-Jp~xE!{1Rba#kIBOxH&T?-VEkWMM-M)>C1TfFb*$M^hv$NS^m
zV+|eS+Sj_)yyiUnJdV??(u!uU>YAf6-HvG>Xq(7dDMr2xZdBuTH(4(+Y5*!sY_I$i
z4E6QH|MRTlkVj0G-_6Cmn5hB=RI^($<v_<pQ$qymT%*}1UaPUwfqL&4L10cMS;7AF
za=XH-sm+;PQ;uQcI<Ej!!Cb0t%k-*Qqa#^AjIaHk?h^zIxQib6>y2x>BBPT%!8xfy
zk@Zg-2HiOVIe!8te>_;hQF86?*ee5n2&#D_siH_y4W*(~5(3SxvnQ~iEtvZg;oxbc
z+ojBsd5@z=^u}IB3-vyZBzrL<lz%@3hBmH~QFO~-S^bLtq?QpB?&Xa}%@*z_x2!1c
zu`+157q$;Cq=9u8G%Yx<I`u}9&&R1yAABpXm80=^RV4y_NJ4y1l~Iw}ZO$SuY6qBS
zZtB2^S>UOdwt?^_No?(uq~9O{U{a_}!&>tEb#;vQRx5v6%kR@4^Lc{v?s<YVTC@{Y
zI(NV`SruU1Jgqy!?o7I@KLo|Rk$a)SWOMvy{LP%(!d#7)LPzvMIDY0u)1dQ*uUrVR
zEeygWjVsY2GXSF0oMj5|<52n}VdBiq`t|q#0Ia+k#_V=(*65?3aIVoH7Oq`Uv^n2j
zGR#*XW*if<w!(nclq&bJ1U@829BZ+i5i9r%5Yx|pU!Jt!DHZr84N>r0O#uPe6obKa
z+;iXk3L1A%7pw64PGqY!L6m9f_I&MWnlD|z4r_++gT@|`=TpGSZ}^k|?9Y63>$|2O
z0mEtLccq+q5EJ*DtXltxc)`FH@Pa)LCT!WMN%cV~YA8wj8B3up*fP1R(;s#%BBRb}
zZ3so+Y%VLMh35EOw?Edce~Y03o}o=?Y5r^aFUs*}*5xy6`SyyxH%G4v7I#2cbmE%6
zS#J%xBV{kTLL$7iNiVm{`ETpZp@F|QkayEH{E>HGjsG^c0+`m6THwl@1}8)o9QN;!
zPphzL#Tle3cUae95E=|OxmiCEDXM7(F0B-zoI2`w)ij?>0sxCemWA3N{BzuTET`dh
zy3_E@E!*c*Z!fzhzjkM_tYpJkQ={n{z<+D0<vuL*Hxny6*wWnvR!je&utc@we%%dD
zjd$CK*WHd}10Uf2`S0WudH`G@xp|L-4t<C*mv;wn>%6B)EXMvn0d5%(hxhz;^RsN8
zmB6uSc|Qg2HQ@3QF~(+~+g-hl3KYZa^)FcOoGFO2*{_4Y@I2M3{1$u$0#_rGP~peL
zE8)VA^`Z+zhMjk+0rWoCwE|B0!CJa`QNqd}@)ZLNys?BieEjENjn!a?Df~Y#0gC8S
zV9x@c9#H-HB?cHp6@RFo;m`eW{_&uCnG}?!d?)IxKr+<y-Y!SKygolLd^D`3t(RYa
z&#-gS`C9#Ug4$-yLKvfT;nOeqyt`2Pf6hU`3wtLi6i^MER3FjKtGdrNKb{pTXYBxG
zC`R)FC{2U4N5`;8*=6%<4`=JSNh!diDVG4QL(@c=-niEW0BqNSSd_c)tjqfx2I3Ms
zn;q|sqBQ3ntn;}(==)xvL^+;i>Zr}ASX)x!xwZGZ6~JZrf0$+19B!S)D{xl9B5qBO
zYN}me@oIqH{Sd$2@5-OQuM=JM<qxp$e+<(EW>~jCvH{GR4)6oM*K;L<-#WSFd^tkE
zY?#b?p|u4@(l9gzYS1=aQd(~H+@?0k<=5=Am$qMl`GWV6lF0dD;4V~TPk)^XOlJ5K
zbsyfyOAP@_;OHqa916g4Y8s|P06#qlBKR~n9T*(}MEir~&4*9DHnQAaM{s=JZ_RJ8
zf==(kpzEzZSDt=E8(=07@xu+@-<Mj~<%cM%ckmxwz``My!1<G<$tS@Uzw79^o@UNk
zrZu7l4<**o2O5W6E$(xviDX`w<(iI7&ii3mj~adUMJP4QhaHsgtviaU=Eeig!TUVd
z9Hmt54#0?LkT3J#38=1H@cjXEDdBU@U@#j<cXmL^VEC$hcc%6TVF<toC~i$t49sLg
zacV*M^8<se4DT(!Dc-+g$Djmc%#v-;`%Wf{5!8U%^aId!7l=@QA-@1>`pg8(3<cr1
z&7{6l6bLv$Gtg;7vb4-j!20{}50`v#sOXWR7hv*y1V7?GKPe+D?g>EA|D-K|CD7o8
zb@)|*t9C@-$M6+!2z)z<8p>55;+lfNf&U@f1Q9B82Wwlw!u&a}y%qt_V+##1h)vZ$
zH#$ZeSmo)ZuFxL<RK0YPZ^v@|Tzz|@G^GUq5a;6_@k@YN1<l}$hOV5bS2RIjcIlTg
z6wNns3moT}bO|69c=lP_HEurGD=Jv`1%2wNI5`?ikfqcLqKM_q*|f?S?^DqnZyW(g
zxVy;N9Y;qMRs>(pIJ`T@uu93q06rdSr(qrelWw%DlSv~ne(H&`=9Sl%$D8wNy-@-)
z{qycCQSFU0wF1WNaCU;pu4y)v9Vqivt$@|Rxi1Knxb+s<E~{>Kt2t7FkBio3<sKIl
zoPI^fbxbMhUP=0?c(gz%!_!CnY}RRB%oZe{6=w@DU(`f8oMW`Ms0crI(09)c1T7gb
z@c-#*g38S8&6&iwkD)=6>qp07j1{6~mTq-0{VCWCqu)7nwqYZC`*)VBMV9AQaluL{
zDvi7GVqQAN$mxQg9~aX0d`r{W%~aLPmRfZX00lDTcz-(EGtYDgAywZJ7v>Lsenai#
z{{>OJ$RJ{ftF0N9V$R!z-H|eSKm~%WmBr|;w}OT#%K<qGWC>QAc~Y?ds0Ys3FDad~
zSw}vvVD34FcR&p?CW-rt=Ku@7({eUHaC|OthXpt?RO>$(5nPx8AnSYemmhj_uWE>b
zU`XrvE5TX&z&b)jx@?4eu>5w+-fxeisVteY-QRC^`tBlmU2Yccet&Lv!YWw;Bm-n5
zO2P1-0;_@Z@Hm<@{oQz^q|_1Xn#B@U!pi-v#R1@u<4M&|_7)br`f#D(t~F%+MoOqe
zd2y_g$TbnoFHkk`59Fh+)-nQV-bYeROZ}2XA#-RZ7+DwWa^vIZYvB&48Pk5ZR|~WX
z$&-BPzzF`XnTq??EZ(c|$Cg*7!gL<vKRW*{pc>pZhv`c+{PTtv*}SB_VO&{6t4tvf
z1^YF4+{gN3@%{i7?itVRk1h8DZ)`cJ1Hb)C^Qg;XlW$rKz-+#sO_9<GV`)rdW@*l8
zh<qWs{=MIYs|_P#Nfgp5NTs?O77ynCBK`3)MvMd#TJzinFw#!K8P8L$wgWNdiKtl7
z%4UhyZJG~yBaqytL5e$x|M*0&$<>BSz8Pfb&G+-A0;>Tg{7Xh@^dPLNLM-!hy1I~b
z*mMt)A?#8@T2#DARvztyLj@UoMW!vX9Cp9C@)K)L<>hMAzi<8l#v4^C9nt^eJllU<
zXl}(yi6pa<qK>IsPyx%-`mw5)Vt($Wuhs|v4IJrx22mJP#IRxBbDQJv5*9Q8*&Pk|
z3@al5P7GagCs(iq#@mO=CsiK;kcGvI@`^AJ+-4o*_KJ$4`L~$OLE?Hj56V6p$UZaV
zDRVVujXEO>wd=Q8B~e?@hjTpN^A$+<VyeNJ5>EUQBQ{jkEqIwOOi-F-FBQnQ%R7!E
z6l{diaa}-do9^XmpS-(&G%DfM^t+^Fhy0sZzf<FiQj#`5&Rw`k1b7Z*=LOz(`6fL(
zUOt!{Oy-i?>}s=VTV?_dH@o;{3@~dgr0%A5&@7Q3pbqhCPEEVQuqD6z+iZhVi-9RH
za&;i7)1I%sQ5Kk!E659P{3&r2??cTrfl~`8F#~KU?;M%|QHeN7lsKJ*tduHqvp*k$
zTyuUpe@;`lyu_DPa4zlhQV6BzSqV$G21tfLhOHv9Rg||AOlr6<%^n;^kyDa^f(-@T
zVPZDoPE(yn6LA`IyUEm<rlj=yF}YScGw`TXcr&;3MDkYNHwWLxgqQyma6iCeoP5mD
zntzW`z7#mEY7oPD(Iut_aO^HOf?&J_PL3|3GX}ixjp^yMTg=7w$3^*+b!)*?&Lksk
zjj@8K*%*aHL7OyOx4wMH22qrPr>==C*BayakEv}m!#=3WiOj-+K0CLpXyFpx{OFgg
zLH}eRd5Mh+N`1@|DX#O_)WY9>JOQcM%vzF9hu^8Y%Va774>g?@nx2;Zdg!Z55n>8e
zv{aC6oLz?(JmZ?{D3C@XHE{l7(gRP#d6^_gyvm|Gj;}@ivgM$cQfY$g#8yj7%_UQF
zLPhJ9q^<DQ^vCCdd;R6E!^JG3nf%t;J^Cl+GarlgwO0NeQUEY@2Oua5w;TM6GVVlJ
z19O+(y{f~Z7Itq+b&hL|#xL2-t^lwi57N0SE%Kw>W5lA8a5#(5#7m7^jN;@8m}u`g
zdLbM8YCsAQ;H?^wjNFPYw4_|HH0h$lF@9c!QaOgfF!>-#DU4Nuf8WVGmr`Kx0>&?k
z{^ScNkMl>M?o|GB8|eF)v^X(ZQOV{7D6cfXhwB7n!EIt9RWcUT-Sly&aFj|bAi)$B
zou;*D*jnNUyB7P_Xs9U3X`Srd;6G_ADTt0>rP8n-FNiYmSWB8Om*5XJgr%yaA2xrQ
zudRoAU~J%s9gl@CNXO8Kv9gA2L51~LV>OQUjE3?px<7r-XuE^3{15W5WC_wBekL)Y
zZc~|a%J;A7B}Uq<xFC>s`Q~V;^dhN5-l{H@)oJSj5YL91^4Oa;A$hDiR%dAeZ94@A
zzMybB&!f1DqoE;L0H;8Ds$;ul4T??k)K(a&*F&c%50!7T`TQoIm49a)<o3I{W^66U
z3Yftza$q@D-!BS4y5wuS=tz?mre@PhQfOl>xsANEoNKIglgo_GC&^=J-T^fNQ0?u(
zJo^9l?(MhLZY2Banl@y9`Ay(It*5eT>T%rg#DrA|qnxjM5QvfRrJms-CC01bS$=5B
zP%uol%MLUb6LE9fgU;1cj$Zw~_L-eaBfNDobK6K0d~X1ealbFx>P;?ve<+b<<%ldy
z1g+)Rbl(#cVV#gF*4J`D8ue^2g{wleIg>*Aky_Sx23^e&$a0RS%UCj{ERJ3QHccaj
z$0I3!W3uk2VNPf(L1#2kNb+y~`OrQ@V$KW>VSCO9PeA+6CrlOL3uopd=I@%+B%&Rl
zKAkTu5JAL3a9VYKKXZ4X&1g+n0fcj^y@HOb^8RF@`i}G6jqjYhAKyI@imnQYIKpn<
zOn^g=1s};~{CD;w8Enx3m}vg87MAtckEml&v2#n7c!PZDy&vkn$XTKgyrTPe0sa>E
zC70M|v?Evkt_Up*F1Y&5{=ut1D+KEdV?!W<E?YGS>HWQiVLpq<Ac^qzb3^dXew~Yj
zM(*ws9wB(uo^D=8g1`4L;E{mmbOxkl!4~?@M-~Gf!#wU20Tt?>3*fS>IP6;6V=B74
zYm#{2B{At01s>f!T@ryu`Aw%@Bo}w#-(Oq=SIFIv?!E+`|E3MB;X)z2FYfO5x{<)M
zd8#5a|LeA(VDQ3~#%4@^7aAuGp8cT=sZZ)ZzZa(huB}$!%gNte!;=MX6M&YW^!Mqp
zl97a9^H(s3H~(I%2UmKj|LZ{vi(Ft0H}T63e}9ih3Orkp{XWxw-S&bG91MHvH2U59
zNLt~$1FGdKK*&DqAvKZ*S)VMX`ODtOI1rZW0`4^D1u!)l*6uqgcj5+wjy3=$AB#Op
zV}AtZ4H5!VrS&8yi*{M+>R^(>(?n)EHtn)lQ2yvY-TlExM<?a_YnJ6v1PONY{m=h(
z!VGv$uoIn)ca;9Sa90Evkr|5$q6*Cvx)0cUNeN6UPaB;#S3&hG2_6vzkL+&w|D1q3
zgXB^qW#HZVXxGs#D)_EsH*H9c6h_hG?@lRJQWE?Wa!?G@AE5?9f$0x;{rw-FB&?3*
z%EW_0yycJ2AF;@J8J>lVI{fFGaOfWp!1flyWf}av7?ks7ZaZ?`Y{>}m^zZ3OB7Aua
zcD?2!S)an)iN=~B^t+sbEMW*F#a**zECv09PyW?h*jD<YfvwD`U03{bjrJF8`JNd)
zzqM)M{%Qunegs@FUmN!U?VXB?Bk*T8(O;sB;1{y`6Ym*5ow%76`p>?|Qo!W>Gd&gV
zpKC~O;ex6gT`*CK`2`5Kkx01anTVh!K1o6ne}AM9?2iy*@5<dup2C(E9~>V0LLCwj
z8@$N<_q6H5fpmvG!@&vpv+DtJ;I;R4du0@WPQOSJlKVgT(16FrljR17pbIkqq?W8k
zV0z3Cyo!S$1c>`EItR_)v%uqmN!!p<r)-$C;YlIUrnZ3C>T89O#6SvH4i&_nHsCR-
zh)k|MiHJ3MbQ&-asHUfX6$sA;eErdnaEIo9zTShR*uif7z07g*E8_)d1%3eSGd0oD
zern<1>VAU^c`EYYYI%L)=(m4Gvli%$!Y;A#E%-i<)FKeNPcOPqSPYx%i8!7;oEFNG
z@H6BzY-T3seI<W7r3--zn1<zbjPYwD89MEl+O6ImdbL(b1Y^Uu``N$wud|+26q+q4
zZ@i8^`9eqi;q|osts^1JH0?|F*Gy14S+Lq*H1>?YRyfQG7?<~+7Z7&>TK{hkxV=ta
zHvnd*DWHp%=e`1E9z&lKEf3&?4JvGA>(#(C0_G?Re)S4awo@oj&SK3Fu))|QGW6VJ
z^f($3Y&ksu^wiQScad_?ZaV}VfBCD^T||lvP{_!Y?F2pC#+fHM;w}31_EKN-6`0%?
zu5nSPjk*?!Pdu$ocb%c@y5<#W%ngnPBtviZ(hE5wEFKc<%L03QhWsBvW?%%`-4p=Z
zLUnoL!_|SWpvULpy3p!_Oh5kp<8!U|S0~$(L!ha`VB~v|?|BQvASv#?+#smNd|Ox|
zfG)0`fGW_?{TNYtypkQ*aE>SA1xiVRWYM6`9P?@*{*i~5)Hg~I*D@*zUw!RzeU^z5
zEd@Fjd^;L*Vph#qFux(c-XQyD2Am9kgG?qKa7jTVG5^_9QkWYv%@<Mud-{$5;&WL;
z#H=b=+hD&WU98{msr2`KxoG*JWDe7Tc*ai_U+9Z<Dhx|7&4Hzj2Mr7_(|5tuU7@%N
zzh)Z@`jm3S4M4w5*kdJ1a242NtW-Lrhczb?Iz}uie36?VmoSa4ely@{3$!h)z1N}|
zsIc=3A&xt3^M3<W78C8B$^+nEbr2-3_XB6aNRUHNEf@mHM>bLynB21pPAU=9?^VNz
z*&e{!%R3+<07EY5VFdeT&@E!sJOZuW<c>hpAy5hY04(rVFv27W5i4Y62_BSs*as3>
z5@tb#gY`8*?z>9BPkss{WUc!xAulL*q=~qdC$ee{+OR*vQz*7a!EKNZ#(1(f-!efJ
zU|R5|w<NveeR>TwhI#Hw$_Ne(NJNj;TIOHNf&h-2T{S>72be}qbXq@h*=lMLSLPSP
z32XgvG9aMM6)mH62W=w7U<c4K)-rToLMHA-uiy`}#*0V6reK(Ba!a9;31Q~JGJGu;
zL!;I3b&TY&Ac@o983~tRS#zz`_c9jXnp6!yR=@jn>g)RqnivwOedgQ>Rzu}rGDj)S
z$*M!h^-GWkmwVdH)L1(HKAA-I`N*>uBYr9Gkc*rHOy443)EWs(X3yc4qwQb6#R6M;
z{;E3-h?n#UC2cFw2_K-9yabh;@Ao6X()z(hCDyOA^pOWu^ILzH&wrZq?E(}*)El~E
zkG4T2Bmq<*l|j#+OUJ+^$<q&j3BHyah@^z&2lE*%XOVO_b0n9y7sbdNAgoHu^r(c{
z^xK7qzPi0WiXqsx`0@zIg!4xotC-|YJ*x`1E2h8EAS|SychG`701_I;^yX=L|Hv8}
z(kJ(W<w2R(z{k=A!yp8Ppg^{s&ekc=c5`Wc@~khK+7uL%54#Cf>gPjAXF!YnK8*DL
z^b~ig3}_9n!?J8ea}Dh&Xrg)^LjvWfhS}-gEW>f5nXV0`q=0l+;d`0heP1O=`v;gY
z*7@I!u@vJQyk1y8c6}s+38efk@t*}>PrVbZbv9M<E3py>4O3PfuyNMq4Ua)VdiO!*
zrt-5o+mGS6k`Nh2l~ws8Gyiokv*@7!i7R;B&S=pRkq`DH6GMF+f8Bi$EC|K(H?0Oo
zR;6SNvZ_JKQV^5y0+#eZEM1^=XFoQuD1zkT-u3K8t@>+1POO<y_JLuaz0Pb|^y@3F
zC@<O<Z9=@wED}2PUhh>I4@Y<EaWnU-Py!QR{!vKdP7mR4fuTRjA&y~y9fBYNY|KkD
z=nSd1`TBsqy1C|iA2b8BCD~L)$HW#;`re`<LTqot=hdq)6!QS(Y)!2ZP+o1Ac_;=2
z#Z4~DsmfgEpJP&45%ln|RA5jf#OZKjbWa{~b$v8C*DW^3QsPKokyf}2r~m6G6A;78
zN$yNO5H%{#>_q(aKF?tOJUM9@0vtSjd^_+yTCF?`UnY~qyj-=GJ^^hhpNw5?b6Dx!
zvU4x>0e%2fwcF~^&GBX~&-V5Py+S^#nQA;wV%5^bc}PvM71NhSa@O22zML)ot3U33
zJey8=<tG|Gvq;uRNlaM0-C<5K#Gwx0$XnSWZpv%@$9<~s<Vi_+6Uy15#Tc9Iu`-0n
z7RKlEqF$$EKWg@WTYB2BC}l>3$`#~BNv6G15-VSwGQkEmKMzMnEelvlFR`9+s(9<1
zy(U74^Uv4?`AVbq%+q`ReCE)it(}=lx2n)xGsoUq%ZgUzf;c7GhEe)5A>Z-BCkeeP
zvisDDsi{TtAP3W!IwD^=y4TZhOwMO^TrYRu5*hgQuU7W#Kr+;xz}>t2@!2K;knUM4
zstFjMx@V&Lb}wRL%$wX$*g{l22nEr#O0}bk2b1xJ7nM+5PcxYOF7_b{FfkAPSIj@Q
zM?cb9Gi>p^m_xG-+6{<bU@AQHXHKkk+EC`lFqlQTZ3PFr21@o$l+^XKTK<+r#!*kd
z7j%qMW>5R;_Jk9gf+nJ`U~Z|!OU~2;E;fyV$f1|B18%Jgfp04j53U|ZWZ)Uq^ryzZ
z`1PYs1%sG%^{iMvjR!z!+G1k9A>voPW6L6CR&8hGE_~H|I!e0Z^nsKId)aQj*~5Sf
z%(t>W1ZP|LK_q*{A5L}vG+ayF9M^|grazft3Ks(NiY9Iy&nIf%2Kwx)V6ciSVIhF5
zz@JFcggoR1ZbimX3HiiFV>k<RT^`!aix%yoHt~Q1e=vCZuPOH?cw>CpOKDd`$>L7+
zk5l=UMB)uJGhp;#@^he>trPk7>w1tQ=c5p5QRe=a&pu$9dzS470q%b-5t}x<cLp1=
zr}PYldwCZE@uy`qNFZ~m7*7Y@#1t4E7QdYI!9Ll<SxQ8W^g=qYUObk7acaH7zTm0_
zXqoA3y+ip6*?4;ar*%awGLHDDPCbSSoW94fkeo*rE`Zz@vdEH&gWI1aQa2i#LYD4Z
zq8Qw%S7ID9qs)>SY`2+-VZJ}eZ&%hO__f{3aoGVyg%XXa09GjZWZZ#JH>oZIky*MI
zB<gVp>8gKmb+)I1PzDs1*YfL!cB#kqnC@_=%Mn~9_#w|B5y-8sORs@<FF&SX(R0Nx
zXUQd*bt-Q;edbSw)OGTuP$g&4)wtG-6iWMT+y1$1eX8Xv=Z*fP$m?Ei40C95P-hyS
zp=kO)N=%E4wDtXCskF&(CMBAPbdPG#yRXtR#=mBa%JBRnOAfqE$De$JcHk1Djr31j
zS%FZ6M11R%@&~~7$hUf5NDITwW`fSLf$PpxuA?+D$1_a~684|fDnVh()s%FvlGt>V
zwVP$O5Qr+DWC(m@3+jJsAJVaCR;gL6Zl&MfkpR3F!^`6bnNqXTGeOvt0%+Z3Pm@@S
zo2{~ku_j<e$~Dj=Jd9`q6%VO*pdR%+3)7Gp;Tu5i2;z@u6zu{e*ed{E3qX~ZKU!o#
zO{#-m5*^0f@Z>GyCrf10;T%Y(OW6c7OT=0?Na1i%FW53Ks&jT|%!^c`xvT9d9KX<q
zW|PSJoM0zUIpr&)D4%-r{^+3isNe9F(ImCAUT|r6!5Tu;NnXz3uCiQ>NRznvHUG%^
zNo96@w1W~;U7w7HlBDd{t9fx8?+5gI4BxpMcrN)Zw`u)?t0mW8lR|f{P-bDn;b1i*
z>#6<uM?Ac#ZJp%xn>9S=G~6=YEeCJxLq};>IlJ866pp?~;1`2l&*_Oq|9dzE=<bIC
zT_bq5B$ubVl;ch+BLgQCv%QT%mlaboRWm7%9#WvY(*xt|eMQgMNp2gSE?~1o!O)yB
ze68n>+t;p$i#odxO6sb?-9N<eVz1VB>lR$CNjL5<5F9SiwxwF4-Md=PutR#EU|F9V
z+K0cMfA!`koZ2Snm?{eNksltvo^RIUzW{L_>k_&d$#<TZDopdA_k_~%2uuYdFTi<d
z2=){NOGCzY+d#BlLhxB&)>*nckWTQ2&)HmI$t&wXGJn>YMe_%ZdthE(B#qcJ>JWZe
z=m%KfQx!l;+5B;s9{s$WTHtl+_3^k$xr^laHfWZ<0!G6T%++E&kfFpN7Q6hJ$5{op
z&^N;eQU+lVWtdD>>FQK-&Bm^XP|=uyF2BQ%=di#U#2byR3t2mbi5TX_!B%TCwDq+%
zKeT_pJvl#Li80uts~G=SH_r;|`zU4zD78cfDB>j_+zOvjBfSSz0A{Ew;s!wa3@Xv~
z9N@ih;7ENBSHHP!0>bRA#`f%PYgju*F%$BIV}Yien!!a5M{;$<#S<HqHFIgEJ-rij
zkBWOkuXWaIvEh5!15AHWud=-UUY2Ge2-2bnoKyZ&67f*lvJ8@8YXn@xN~Rb4t(`6b
zCpo;P;hxy9t2XfA;j5@T=4uL==essy$ayV(;iD;=O`U^jYHI+5<fMc=$#hD%Zn3V?
zJ$vx(9Q14OgA_V5hlK{#ug-#3gZO;1FZQKLk-NMG(9O?xq~ISmF>#y<omQ;Bj>gR)
z*S!UiNdq?JB%XNQ0j)uX;1X8m@b(D+CaUIm726#D%_AA`<dF9BXe5{qx@m%OX|w5y
z?LZfKB~1FfU{G~rzbZO78>E4D#2WOObRM5P@&YKI^Su?emABWx>55tg3R`l6{er#W
zTDX1yq$b(IH1xRQ31Yz)i$1eRz(-Yl1#nRp(dIAq8XxQwebkP%so(?`!Nfkb!sUX`
zMs2=Z3-`SD{n=0-$lgPf<ko<6jj&>1TYMSQxFzG0O)4#K96WJl4(4;3{izx%w^T5j
zaz@l$0vgaw--$bMZ@xPT*gdYbUyljJ`Yu=Zas%-I&IA|B<lZR`)-tK+j~cp*Ha!>0
z_we2FVdmL8AU9H{PaOZ9sJ?_X&p}o#fc6m&oo*%#{af4{sx}ckSL^VR*3*wI7RVT0
zi~M#Wzp)&Yz2efWgIcW1SHKArcPpzDBwUKLgdh?_6>~>PN%fELl6SoNCWW#@+>*g$
z#0DJnQ+{|ZMFSt2ti_qLV?C55A|EeacLXp@QmMuCR~#@;Ltt#=mN4>>U`xZ|Qt(0p
z43RrkeyZ#uY;Gr-KnPuYB<Q-MTQ^F@=a<d3G^}C!<O(8wd+k(5FNiloYkc(yAim^E
zG>hNk957@?k?}|qvyO$-cArHFj=sprLcUPKFqc9ZT>saR#xuP`e(1fq()ad4Q{<xB
zBj~RD=nsO~{awVXC=T0Y(O`z<b(%hUc2>qtJ!$EH=a750R<_LtkM-*9a}QCzgi!tl
z*<7i_?gZyjRWPNchP%IcFPI|ttzJ~CrF#UYxo2lx2@<%0V=GPP4B)vk$y`R|%8&S!
zYH)E;-7PKE`iP*5{3oznps`blyz|5U8#9m`*jD5jP@9-llvsLtPWMH*jy)LGDS9w3
z$NUP!)^nmwu?27*TTheOH(d#F@!@knKHKlUz*7ff_q--jSTd*kR+S)K*K|c&*H70y
z7DIdrTHfquv&0G!LurpY#?f{ydC<J_0Nfzptg<5O7n4d4y!S`!?=oJU(`N`s!vKBq
zLx~pNj^l#|0kj>FCUORmAaYEiQ;??)B(nG4awF~u?--5qq0$84dA(@uRtjT*jmJ7F
z>S&d;xZfPOm}MHWdg+9X`W({L{QUFl;?&d&l#y3Q>+@f{a5k1^_|F9A1DU92x@rk_
ztUEa~b0xfK=$yyIahfMfwAhGSZ9PpohcI%qxC=YUXZ%xWL_Zz-w#%S+N1GVni-YRM
zskPCq4JovJ<`{#2<3oiDn$9KPScZLwHA)WgOyw=zdDoGeMDy4l7+tF5@klzedv@3w
z4%f4?Ze()*_YnxZ85%hEJ%Sz53@6UnGX(9jCu5!M^rcTgaY;4Kv<a4W8sqPuCgK^{
zx<5SuF&fH8S^jKYFiOss>eXN(3+uUzB<L=leIpe1KKL4-0MbKyk6uq}kQ~)@Du*Ms
zN5Ip`vEV~@ma;BGi1~u;3%XtaG;It!n$=7KR$r>%t?M4fTyk3TJwe@ip&o1^+Yk2D
zrWm%ZL43p2ez!MP`1UeUl#=d{Tx##ST%{)akUfzgGs_r)*`i#D-_&S!tU=*cfrXg1
zhA&)~h9|eI<1qTBg89&7g|P@5-s}V<kreD0T9}=$1JL)o&ia}cH>pj&yy+8nSO5bc
z8}{tB&G^2-9_jC~g0Q@Er0siV!~qt*Tmawm3Y15R+JHCNbVZDub++g$wlC<BwG)D>
zf6B?!o)GmuuiWmX5IjZ0KW3QO62q;M%m7cs7PH-X;Yx>={dgbJX(zaAfDVt-30Dqs
zX*{`_N~9_j!Qb5AO<ZYNP4?bnNB(#o%~v?deTi9MAF@#DUXPBCwIWNFclzu2L8A%Q
zRN)k9s)*<{ZK9=&E^p^s83(1sqdt%er(RCIjA0qo)xT$U-f$x-`AZD#RJfVIBpv6<
zg*9_?^?KehGyfHIUOWS=RQBf4(a~Iny_xT?MLy~rt^T*@U0tD|i^zDP?|(*qS}Rb-
z^_|?%ZhuTRtW6;6=u0!Fz4&S54SQc}*M?ji{i9|jiJb}!2&RamD=0+csURK4JXw7-
zrsBW<ja;H&e`Ut|kbs#~G&jVnzI6bjMlAc@Z&(@gmLkLn1qg$jh<@iZsLv&B#8{qF
zdUz}n2WJ?Mk7F;z3x5%V`Wt^(=u@ipIEu(%m`zN-2w!}GSfxbSjPLy3uX$YoSbH)g
zUNTvsSqL3IKNkHxBx10Av8=QER)HIYL4-DVeD+<1Vl<1>A(T40l|j|3u2DEq2s+f!
z7**ZJvy!Q~I4QDTXdYlRX9Ca-N?q<T!SzNXFf<pdNfw8Z4?AE0<)UP9x&XVF5D$a+
z_0sUBZvbRX6EaSpmyOw}P#bGmj(sY6sFK#KsJ05QP-^e<1Q={a)A$Ov?RJBasucE^
z!Yc6?YUzft=+bALA+^)yO|H9|Q0{SWtqEsO?7TT+4D-`wLo|4~a@JHm<jGl)sZzX$
zBM1m!Ca%2)qg4k7(PYrv{V^=;?k=m<#Bnomi{6vM9_la!yfotu5~w<_xm^yAaxh=b
z5x@&HIo^5@LWfli%C-T7e^mf!*4m;%cbD#U_`yCAqMHpFll0@-T%#R)qwG{+#pxfj
z8Y?cKu^^S@0~!n6*zg>gbbTlUfmVD@oj>oD`AdUBG;_z4NKU8T&PPjTbJ->KMCZ95
zpBJW<Lq>-0<;V0!AvZHDN+lA+@QLpBDZMMiW1K@J&|5Jh>qI9oZ3E42Uw=s&iZImz
zIk?tPMJW{QG{F}@gM&=0IuHfd_octn)AYtriZ|H{*v>-y7js7px#24^Xr$M>ST!Jl
z#;EvX882|2Ays;1dWfLrDl==pQGJxUnU+Fjj22wq@K8*G_L_DwW^On`1x+FyRT^_B
zg=<fE_SnA5Vde-2HRvhCsWFH4=X5ZiJDulHMo@!S3Hv3$pEj|gTEPi_7+y6Gr=M~n
ztFSDX3T#1?;(IP|TdHBbzD_|$0H4iYa6tggH0-jNyt()UihT^oVyXj^WfWG>g=Vvn
zk}3<z3sitoap3p1>m(Q5qb17-5sgS~5*Q|5ZXv(i+-}@IORvN<H{+Do=nu9>bkL-v
zL2BPdCwaQW&X|KZUk|L&Dm9Xqjqc0+o{gf@Ro=<o1bef#8@z((SdZ}?LnmE@al$X0
z(q5EpFgM=TdMXE*&$B1N8QcNgCR$r`cS0x}l1i%@BT57D{wr#UEqe`rJUH*Pl>u?z
z;q6yrXie=JmJEWu8hTVQT$0z4oiU_9J49*hW4>$Uj^SoMUJ1Q-__3Xs+Q|1N@p;S-
z<jBLfpdfmPjCHgR@=#oN=5R#KkIyEO3j5e`;9{sW1Ncfirp-&7h&!5)-UmvcEFsUM
z&9Gr@q~^w+Q_{T=o+a?RB7ow3u80YiM^fS6g-1Am6PyF)H#_nY*5P7Op{ZNSSWX}h
zATC>mV>WwN$|=1#oFQ0&kv9zbA3q?VI=ZR-WSsQnwSeTjfYVNq<^~I$Pkj(ySp5Fy
zuOS)7tJ3_y74PIElAv;X%*YrW&>$gs(a~!_s+olQgbMooEBRL8YxcN|w!<m~+*ebr
zUuj28Y+MpR?IRPc$@;bz0vml+%Xmx4mrp=_rBN$=!V70Ft&_a@C!Ti6$i1cEVV{E0
z>WCuuH&o%)^qHOH-PEgf<7SV&-7zN=2QpiBt5(Aie;)0;XZ*!OOXW!b-G65kYP#$l
z^!c;h!n_UI6kC1hmyl8O9WDbFG9jE=w~rOuT`Be<UZn0ZkEPY%#85HJTeTDRV69Zi
zH%QIs=I$ogI%Yxel4Ai&3aAw;eY^x$%P5E{(+AyX674R;Oy1K=1E5QnJKI>UZ;D}l
zE)_JWW`U|wbnuc?C2&y~cMpNV{ZTpLHM1-Y!n4<)V@;B=O;Mt!2Einr{;*ph3kgIG
zB@eOijGMA+R$5X#i>_o?^$pmdkmf#7iuS#?fQc6OP3Rq-au_OiE0$omSrJofo4rOp
zw$I{YwG6H$EL=zXK$KBlGTY4ktgukI{<v2m0h`Nkr`!U=qh*=sOE?$pV>rW9MwLN)
zO>)tKhW%#+rRN@m9XlK8ooOxabHoKwfp)Aq>n3k11-0%31W9y~cami7FdY*X*`xSl
zwb|^1?6-sY&kezi;2U84&&D!*o?ar{#WLQOdHB<8S#Fi>J)5ZGDv+lrm%qrWP~EWV
zYRDFq+7-ead5J&_-67WFL2*ynoOKdz<|3Mu<qqQGlc4q$**2jtTf!Me$GxxWqn;tr
zsrPh!XP2+XXbH<pXQNv7IVvuVCPEw25>~(i8aNA?Uyq22xkof~A0WvZMD`p{b$RUj
z2D?=X1c}Mu@l{}Y&%cw$rg>*^w2HhH^$22E_+N@>v4kZ-@BGOmOy^RrnNuyMkpG<b
zkuTZrm#6_G*)w!1=(Yqk_93i7m*^6X?`Zolvj<_{P*kO%nn#s)j^IZ5E%U7`Gla^z
z-v3aLoynGzC!9%ni9MW@i#P=Q?L7vPr|Cz`M~_Jb_s?;@6a@1fg$(%eS&upe2Q8d)
zel>(zU|v96-rEFSE_$K8&3XY~m|F8B`{iF3Eb)GDD{~Y`1^JWZaR@C9``!;f!h0Nk
z;STiF#3BUR!G_l0>^#peLaNy)<4cpul1aDojp$l*7s<WLbU2k~x+HzxHBQKPmVv(5
z`{w#$V#zpU0dt1i+<P=s0WvOled_a;(kUws$0SGZ?F0xT!f{UbP5e$13?P9_WWL+y
zy!+iskaabH?$e2PyAr~K8>Yu9ut$l*qA|hCHf~&uw?%y2-Y$yTHc5M&ex($Fig(XG
zioScY*xu|DNS9^abt39jFA~UFpoPyQptZ@+p`|Sg$v)z61c|gic@V*MNbq!5$kIoQ
zy-rnqLf{ec9$<}XlZneX%)O6A6!)V2nt|1VZoVEEnK(TCW4;d49EKZzUi6@x(Yrh&
z7i#v%1Vn9$c0&5k-R1LS-XCxy2S2OXyuv^WE70MCEQuqTEp%DnP6l}xEDCqFV4l9y
z!~Rwluze&$i)9Cj%ZRUBL4y{r4GEm^J?lmYT=wP45Vnz-*?W`OO39y{UhapuFE9mJ
zi&z6iDeGTI84`HoTXm5V`wn;P&iqP`!6OwV{#ww6cF+@3Nw8|rfy*?DHfE9_Srk+)
zN9o5@8&MPgo9T1EQ(L{nv|qtAbFt>4RfC(e+NSCnOGEJt-{Iyp%_rwcP2>ibe7O&=
z=NseizZ4g;cgz@`=GFiH>-PQt0@Bye>8XfGW$oW-u7|G!UaRObN>tp)e&1=+KPj{7
zFgtXvj^nlY4R_%%T}ror*T6@b9N$n{+K?meM?Zao;xf<p9EU4G(7fi!=~N=kKV7EG
zoD_m3tOe9dFWIxO@HjM;Z-TCuE<SlEcFdx&Am8&_E$ukNmSs2+b*F2`gnM%7;vy7=
zeoo^*$rn9Ykteo0hs=XDFFdoTgx!HEC~78c(VUZMk1~qm&6QTbMhy(BeP+ugojTc~
z2u}87!5E?%t{{NfjXrUJJLYr#sk*jgGwQHj(U&G>Db^aX)Dhs)198Ej=a#D{Lh7)`
zG(X)6H4fpCq#836^t4fQ=rfdR29?LbF0|}p<+FtGWjL~!)`rK+ZtlUBf#!@zYSmwh
z_4LLJwFq}yq9J_Wuqy70p>!iQy+hgaWw;LA|3Fxf=f}sCrLlx{MLJux0E#!8KPf_(
z@Q05Rzlw5ehl%5TIri=HI(_x40}l!Y=C`j9=1`mYHL5;hE`Mtbf&d<*Vf0RJjIwuQ
zK^GojXTb2+e^--5*bb*>GdsZW6GxpD$BU9sSEp(puG&z7RQutqIKeHYxLX8_+B<R7
zZtxvmytro>s2edV*lerK8*E;MbM&wRQ$KI2IxJpFzD2;me}Xd5z7?2@%$jLZ;9X|%
z_eo0La#8>8Mv)+JAPnSE4>Da2eGFQqTAMon<PRDv%rQ10P}&$&X-=gjES2+EEMj7t
z{on#Z!Mt>)?4Rvo_ikgn8P5s&aEI(Ek=j?YX+ymftEP;YgQ^ubKMiaMevGTJ{L1#q
z$J{0M1eK(5zr}t8sW`SRSM*_dW=;8ZSWW45{qgOQ?}c@r>=%>NflE-=`)X$>sMl)V
zxV<KMSy{1CI{CW*sYIsG^~rL>%XlFl*MW@uRN<Y@9w*-YioRDJrw;0Q_Lv^;2kb9w
zx&(Q>i@lJGRd=E|`Hhk(*~Hn6Zai|>XB>ncj-+7!`lR!j#*ni@>2?p*)Nupilhc`|
zVZtII4yv7=)n32dDg8mG^*$;t;t_glq0{h~)Cc%>q1IvVpMe=Hv+@rnQD+pCA=Ns(
z-9o};{U9CK(+EOgoNOamJZ|pf$lSchpwDCKEhtn5@rZ%J*uXR3t_<wEnw=@o3$Ix6
zL9FnYkEf9n>BZT%hcW`YooRJqG0mUQk|xYT@;!u6r76{IelB<bFJ{q&W%|uVI@LcV
zb=;b4V{fiW2OaM;_VtxJUAXb@;jOw30;p(J6(*hQlGvCv{S<hz*uG#7z1>we5p*Yj
z68eR{X<W!_d$ia-<oBEKasby*Rq`Z+?q}=$9)Es&nh1x7sy5t}u{0AnS$tY^!jQS7
zU;PgArq>6v`HT7w=FI$4j7mSyjZJ;9SPFU53NMXA5@fn{&!o{a>XepG47$^q#uUT1
zy4IecRxiK(X~RC@9>o6L;9h{YIo{g-H`KRYF7*}=5`+NqZt+N5X&R0iBx$<Lj+bRl
zQKSZlIdyAt<TXDXgE>+KmtK2yN_;)*y>LAE>Op>Z#^<=agKufH_Jk#=DdaVzUUfcu
zx?|$;)2lZSb3u;GmTPOw>A1>gE!cK-eHmB@Up=gzNG}_DEN?t3pNz>8BAa=1Q3o=X
z7adepS?yhD3-B|djJ0PE-B+chPo6XhVzEloOQb<_P1i#@HaKiAU4B(EOokDYgpHc|
z@CBb8wCsb-nDKx&fc6bNpP4Rmw2HjEDqZZU_+l2K9bqK4*#qOgD7<C?W8hR@h?^b0
z=4<{o+-5qO8%U`g=1~xumd!Nt#+>?)HO-7fW8XaQ=a{b7;cJFSx%&7vkYT#VZtrV5
zWy*Ks;Hzh(9bqS7jfg(FBm?Or(cEH#;pf+SI?v6NMZNgHf{s^{=mBm0B7COaE*=(-
zN?95SUQH;PaQm5%Z8|D8#p;#{ra3TGEi-u%`f3pL_i*cFBW3g@8U{bloX;y*GM6aY
z1D^!prLVI{kgZ3wynY}*xv}U*Dta>_jWPede+PA$m@M5;@m!EKuV9tp`{WuR?{l5+
z{(N%Y(WMb4bV{#ACPGu*xq~dhZO1|HSz7Y8ww)itPNLdqKR}Sad0r87o&UwC11g>N
z=xF3{<;H&31~rm;Y6~lKv$Yy+N3&T6&J_lIi~xt^irvnmpbzQ9QA&H7Oirh+*v3Yw
z0fmJtix<I_c0qY7PCp`fdG2*j9n~#~!_$+kTL^mNE<O`vJ6a~29fcXn4cEZfsAD)Y
zV*R7WF~cwjF78dKR_VGGLXL!R^LWl^penV@61(QBX-*g}M;FN(-5hN}mOxQcK{}iF
z`b{7v|7`9-5Q(7N`BDIvj;PCHUM0UEzTe#n5?Ot?$YGAmL~odU&`jH7=&$8v$)a%3
z(boc`xMeu`uYb13FxyMTAw07{50Zv>ZHF|dGst<QRXJ4ZKmTPb<npWNn8N4_KR!tC
zn)`p1-^O8#P&NO2;^Li;pflh!(l;<rEw>hzb7Or!-{+hyGuJM1BhOVK36-L`^)zRS
z)$N<F!p*FJvY8D9{Y7<hBF)KsisQwWg78q2ZO+Nfhl$^-F*bSm&vLEVzwDt;dpOr7
z_G`@%ELsgNX?U?eSQz}~l9KaTI<qPN5>vgw_~k%CLl`l|WlWjKerTlrP2;TU0iEUZ
z3m#bG%4Ob`P4t-UV&bww?QOKS#rAkSn}*a7nAcu5E27X_@k#tX!5Nw;bpIA@5~+@j
zA*!qnsSn;y{HIP)p@c{jjromgc;w5)e5@C6CZx8R&Nxa&d0l7dodeM{VkNfO_A<yZ
zWM1cm(<tKA4qXA>;``wtc!8Np^H}rHpNP3xc<qWjoK&ma%8{hf7|j&+T}3VBU0>ST
zd?ew5>(F|`YbC=7MI*em(vIqOjj2OJ?aA(aC-B&Drr|Q$UAdyZ!a8cl{^XGmd6fC9
z(0HWhxd@06mq-M!&#{A1lcyx6j#^JeqI(Rxx}IG3cKSN0yNru-qT%9ke&IZQwA2|q
zcd0VKA1OY@8AOG)+~0zthR`s3t*7BP*?x>RF=36LTn`#S6B0qzM#KbpOX+XW>U^$T
zlO&)-k9~LoNIAY^U!j>~4bW1oNA6MV*m;u}<4mobL!bO=d{amhWHd~^G;=zN;CH6Q
zKrOe#9<Lqv5@3zPgb;-HP;DRV=M6p-P4W-Rm(kg;G2G~DAEyd3rq?8tc<3PB{U9hE
zq>kS_1dj>1M5TJ8FrhsuG|)$3VHd}vn+cAEm`EL1rj<)d{<p^Ghq5Nmp8Z5edJd5(
z=U&AM)cO9Buw;N~S4SSIh`hQT9HSdBPR37X*E!t~VV-Zf`6FrZ^B_Txag>5fk{9K%
zM!;Bt#H4lw^4CzpB`lx0=eLBG`j^^im6tNaC>OdbxmMm?$<4I=6WaO48pI*t8SS4u
zw&l~=z7Ak&lqU}&Qz+Cm?v)(S2-j-hWKNa$W{KN8zVID5;&ocV^Z0zByIo&8G5eWD
zkIwYyTO3u|;m|674r^Sw&jgcJ|4$qR)hn0ZlgZ~U>YfJts?g_4d6YOejKQB$@K>eA
zm(uW{#%MNkF0ZprS{aQ$3@kp7MQO|fc-W`08vr09O9-#hsrN8SBEEO)2_)jffde}O
z9y5S&_!F#Nz)H;kmyD(+DNed-*r6dMy6G=RmTil?^5mgxEof7*aKRnp4>C#0V?-7z
zxm$h2{=JM|QeHzS>->5U(Z4O`hW_I{4zh;=&nyDWCn_0zR?CI=<Rcu#*y&z>_J(X|
zPsCJEnXgNhvn^-{Yx@~`_$nD*rA6vFG`!d6zLNCHmgYZ!%;*`uq%IX>auT?uRd7>r
zB81kco^07o4t&1Pwzj2i@Zr%kO>+OKKchRbPUp0L!jH8zOOC`ooh|~wfxsw~{_GXX
z{L#eTL$lK9r$}8F++iX6^)ljo?)B3${Gf|fKs2iLD1_4&dgy+ooOR@Iwe~dC+g;yd
zbHO2xc->QKKFy_?;CpNxwo})X^F4pghoEJQe&`K+xCN}G>t6Z@{J}o0{t8wANZamI
z`=`M0jc|X}28Glm2!*Z>nd2DA6WjLwhzP0*<mzx8o3+MaJh$g5Eo*XKeB!50qPV)9
zw{L3`nVva<1f9z6<3%{ph~5*%joXQ*y%K{MM&C#x#T7rb230FhT*+q)#ti->wj2EQ
za(gbQI^~&i9IuDf5aYI=SF}w{Cq1cdl2l=TxR~}@;itJIdjX#=+KU&PxB7EFE<+?k
zY#bkF1q*@>No3?7baP*e_<biRc^4qKw3DIyW1^wxaS8T&diQNrB{Y{&W!pv1dr1OI
z2d6u$AyU(d0KgFt7X<^<qr(6`&zZp=F9(w86F@hrU!OEO9M%gr34ueZ0W3|m;7s~k
zMF0cSLM{x!4s@!%$M;T}JI*-7$|{B>?A88<uOZh)XfJ~L$VB^qPhn=Yf0;k@){Jy7
z^EJAeovz{YANn@PdAK5vjXKH0iD_34cGGqNWQk9c@I7(!Chf75<WMSSgw^HTG<ClH
z>ETm|OxT7>>AahZP~|vI&w7W0>1s7jDdY2A(Wl1&I%Lx9)}$>Rr2vvxS4~Q==3kXR
z7sPiBA0wcm#gFPTUtAIl(#ejVt_J`gWs)rk_;mW4T&iKYpf<uIv7xGjmxOz856h!+
zc$q18D`7WY5&~loFxi~`!ys@_l*F`<4~y)sktb8Y<nRY|WuIq?v4<BFgJGr>MY5+e
z<rVvHX0^BZr(K*sHbXu(=zu1s1v8TU`Zdd@U`0mRv;^0{D&lU}ljv%Z?PeQd!TLU}
z)OXr(+XkJ}H7<yntvstHl+}ke{Lu2`6e`jFWL-mZG^JpBvyh_iY~QuB5aY0N%g(;t
z>Uso<i4|uHx0zmeX&t}=9P96y3QeK0OX$|S!;%iXk~DrF&IZ5NPXGq9MRQ8{KVVyT
z7}z%BUno6o84~Tg+;rxfdksG<a_{4@FJTcsSg@&<gc?EEVw8)vw0rQg^>P+!9tYFW
zPksjzU=lGdC<kR$2A^yX#>h7f#wgrM?A5WVeE$?w7^<baaX2?V_W)bX<}iH};TNdM
zeeNic^jIFox6UNj)Ee{Fi+Dc&!h9`HsE!SPlnAOywYcIs#LiVSo2+^o@%l5X9(^de
z=qN2!c9Z>#78RT{nPbcZt7Fz$QnFUC&rS87Taw>S_w;>UpWl)LNe%BFviM|})Exb!
zxLA(K{#x`(4wkQ%eqGPubu~3Aw#-^6P`B$jwHu<Wus?MyS%hQt#xb}S4Xz$Dc<?&y
z4l?%!$s6c%FShg_Fh9>zTHVgHQ`Rf54F<-`sojBsu6)m25?<d4FLfTj`=B(y%BG{k
z!-zGfxl4ZG?2Buuy{=rW2P)iHo#chj3=Q}7F+arQHx)b}r!09R{f3u=&Y>d7*e|X`
zzL?Ehn`25b+g3om;Nv}72~IC9t?M2ABgTO^o}6vXRaY;k$CNtJe9it;7CO4OA}_Ui
z^y_~!kzb!_Y`JS<wtSM6V@#$~bejHUq$Lhn70LMYOJRHJAQ^RDmn~EeCk5i_*EPx^
zwM#NFucwe1`0i}iD5iAQ0S$4^1l`?k<n;yxu44(cXoJR$VC@rCS8k_FWaRsle;7T$
z*YW`DmdR@=A?iOE$1$V;#>^hRFBHMgnYn5MwM}lQ(MyRj45;89VXxa!n-aqnl{63U
zpxB9qx#G<(K0%SGdV|RHx#=cEFehyTdt;GI6N*&_s-%Jrr+?Wtc-XMcR2Qp5B+Tc4
zPb>oXS#`LP7!5_BEHA4}z#>c;g>dhHmj1>td#jlK`VQqt55stlu|GcIb$fE=xmOyi
z9OT*?t_F!nN?H~Di!u&)0=l8xmY?o`(_ysWXQb=MPVt7uPKa8QLGe9?-;4e)f|dsS
z@Au{(A?kmA*B+qM)9&j#_;CkU*DoL-{kvH3=OXCJ^Vwqlxh+f$d=CJ$Wgc>1nA0H)
znf+l_yg_=O4whMDD2|i;N7R~OjGnorAn<jj8h7?Pm6U9Tfw;KXMm+u{o^(?KtLsp=
z3a^yfsV8~v&inVqNZ3Z?VGdc|k%WTze!(+=?G(!GdHMHumtnLdcRfpJ!oQfMUoc2$
z6C<ugClM5(voGQAjV6iUMlu6k<-3gq*hbv{9l{IF%K=6MndR`1BrLEe!4uC`yd%Nj
z<$;y-{Oojm_UB-Mo$+dixe#TG1Kq!;=?>#38EU5={ezn&2onMOw`@Crm=5;&zu9B{
zWH7AtW76dDyEB4O$CSa#I7vtJ{pIL%V}c(z`1CcC>7Q)^6&bKcZVEJGK`j512-FP=
zb$A*pGfDpJrz&KH#vTy1(g>f4Cf`{DT0_`wr6jG3{<}@f(O{42pC7(?pt!_N>n-}%
zY|uW2@uR*3uhra19N;v(Ap|dzIksGNw*v?VFh*E&*~5(||7;VU8*Go};!z*Iz%Ykj
zS{e9zBhOQCV<abg;T=_t2wIQ&zcLYk<oegcSa`pZMc<)==J3CqfIsxWKVtcpHvmLW
zZ1Mj#(ev|SRJp?f{68`LKPrZ_nP24F(P6AIM@Ptis4k#h1;duFr=$}8tDiue&`A8*
z%Il>$4c^xiv1h*yz0(zdFwX+oA#X$%9dmb$)&!<MsrieNcWME;sO&)KgOT|}{^Mzo
zVMg->u@LH=S%4?l2c!$WTfGDMyG#BM*h-*+aD^&=P0Szlaq9f5H~+=RxMKmzG67Bh
z(qK!)|JQP|0u!5zF*eP3cLmV>=)qmZXgLOdxdXDKu<z9xQgQs}E+m**zC-d5zq=$k
z3$xaO3L|g-nxNP>AU%kr^uODKH%Qljt~jisDjNBpOC~_~s>$^Fzh(B$BYM5M`en1#
zM(y=^FytfU>d%Awj+euhLN1qynN>Hp!_$1lLb9hyf8JV(Zoe-z-%+b@M#?e*7M8<4
zl9gy56f%@~&H9<scukXsx7yg%%ZwdZ44TUlnbi*$Zl4yPS%6J~!t7;lgxqr^+E@;Q
z7Tp`eD%IrcYI#omX4>gbW+_<YUx)J9IQ4FGq`nraZ~<^LC^wHc-sJ56IX~JNbnnwn
z$<4Vs^BR@Fm46v0AW}%-G>FF}<t(}dT(+W{Tkwy?Io>p%oLxjXULIRMfw;;JCNeWc
zSs60AluRVdXl)f=2~YrHtsOY{dYNnbO4Ggv7dG>nX~O287@irv5^`Eke7N~l9{iKe
zjDVWzTiX%%p9KJkIwf8`hgoD=Uvx=*5r^d<hU}*`?*pKN{()EdQX_2nBmBE-Go|G*
zM%<Y~=nOJ|VO9s&Q5#&DDPP*KQdrtl;(buhp0+k$FgSf5j%QRLQNRDM>-Vn*hsDmB
zl9d1mRY3e1;3{f@f{=s~+_j?_sFHh6(x3Ec8+<S`Eno;6K;HPEt_UO|{7cWTw@id8
zgGEmy`oo?qJkK_5V2EH$?&ymA2Z0WD{s6`^Zr+1~0-6)ZH4@-Dg=46M==-8+5*!)X
z<m)o_;x73qde&bFu@TtI12-gNT;=V)yeUi0z08-k{{zCJUKI9fB>NGsx!GaAuAvk3
zUp9C<IgnDQ`+>zjdgA}{zWwSx{u79!CdMiUt(#k;k;nTl^0e77@3b)?xGwJTUG~Cc
zifOQ#T~2wOW99R|{(%-=1t_J1(ZAx<jp+}V7ejGf=-T-HjfedPD)PD4!7HX)z^m57
z9O?$8$=rJF;g`C-e*dojkG;1FsPc*4g#{_;5EN-8ln&`G=@98|q>=8D5=6Q;DcvEB
zv^1NRE-C4fu5Y&Z`=6V0bH3YiF5K4UeP?FPteW*a>xxP(4H69i1Y9)j|DnfyU0{Oc
zy_eaq@CQvpH8!))`E52|ojH1wFXRqgMEimft;jLoPO=<rMq*{ZugC(m3Ta%_Z3X{M
zpsx=Y(na?tiF@~^lfbXDQ;gg5pd7zhLvq3W$g4Mo&elpUvWfzZ3}!|*PhHQ7IE`<A
z)hm_T!L74|jq$gd-#-hcItIAe((YlM*t`f|NV#)EQJPN1uav$U4o)RKH4}ji82Pnd
zC)=l^Io^og+ZR_<dVWpO)N;?-LF+-}Gva8R^e+*<QFbWz%BC0RH#h|L!3SQ*2wzCJ
z8HrS_;&oC^dq+eisFn}jBAXp6-it&ZSLiZr&dot;xe_tkGn_U0h07`B;Mb$|pm3t>
zp^h@i6iXh2Dvug*cpNETQZt--`b@&{Tx+7$z_avdSq;_#Pt$Kb6n3q+P}9$8FKPwc
z9ahmpU;GEW$HqH18{xSy>eI6a{8Z9;T0Am<uqxYbx!tq;iQ%5rUvHBS2jt?iz@d}?
zExf5#x+3B-MOzYzv^XPaPiQH*uK|v)(&bFo+)K_wh0FI1d<AS%)0dU9)N=a_Myp+(
zL@7~8OFI`?P%~7b5Kmb@GBaXWF6IRBzb4_Adq6PxV+34<wD=sh+bD-m8*4?kdNP1R
zP-FyS_TF%y__o4;h<?F)34fBtq;@Tfq}Pj+<}`U-_8`P_K9YTpnP~O}u0A~AKUBsI
zu>#U~?&0On+R$dy619xi{7(D<=nRPy%P}o!2MfU&6>9EJ7WOoP>6bJbZnll6i{o{P
zQz34idy8Q{mt8?D=%AeJ^yfNqrKjzMePDJ)XO%*>o!@iHH}EVlzjc72;3|N6DEZ~X
zARk)sQ2Z#sP}OyA_0`24IR7rYT=w^mGDF~CSfu8i+>2Y2QRCu`lDOYh+0Xovt1Tr*
zqyw|H2ct>5%iWGmd(#^;(7jz;3&W#Gz=bkUx1c5j<-N=F){oJvYUFGHjEn}D3Y4cf
z!As%5!tG()ZxUB>Uy%o21yaZXuF25Vl#iZk16J9`hJal&o%Mr#G{*%w0`Vd9Th}!F
zSBemW6qc$jNC-91RBz7OZ$+<G_snb)t`Uz}_}EZ?f;izZJ#O$lL>-&iFV3ept`d>1
zr_~k-agKoK{=}m&`K6uB)1{o1Bv`1a9L*f2>3HO5+z=c|auT4U>~EFVD#F4G>4t)-
z)A^x!#)Q&3a&CK;JPlgLkBd5FQ(nWgn9M`8n07IX8wW!7>x3J^0gs1#L#yX?Tm8*q
zf_5eddI&R%LUVYD9M;KvN9P^(MrQ=N8X5YnH4L-alR6PHVDoqKv%!a80ErtKk<6PO
zwqT{lTQ}B9rd_)k+BsKn=R9nrU$$zZg7Awh({rdLR^0~x5Bv<sK1ap%t4WPyi9ZVX
zYg)<sP<CdjWeO*Ysa*u-dpCL%AY2sNKTBfZY%$}v@Gaj=({pR%O8)h&cSGS`2*fO|
z%f)5=&={mlj(2XMf(%v$RFwkRn@@A8-p?r*LBPWx<1e5&TWaM%mQ?<)9gdtqfT6a{
z$8zHqygaCRV~r#tdY45djC(v@P%4x*^^y)flq8Rasom9y4_r0o4n9#4$lvL5`eLI2
z+W;3IuM?)A`*|JE=o+Ir3JJ6i>l`<uDT(>0>yOtS7X{fD0wlSz=+Kw@xT9V2K|5iy
zhxxtg8If8Fr7JxErI<TFk<o6Bna29r`jDl@tmR8B>A@D#m=414qH5!P@aC1`%NBt1
z=Ko!FxjFl)(x`*6eto@5sl@NhI8vTR;M-X_`aY-pp2sF5x9OwzFIvu}QoXN?NT_Tn
z&Y0h!Scxu@6)f!k(dT`oU8!HGQ=(a?YI^fpp+RtVqU5L_I_P11%^5@GP%|((ZDBb&
zte0cZ-=I|LzG*y*J^=>;Z1MQrd&fu&7ucz&Z_kL)U=9lx&5{_G==Rm=PJ(K<RzH6#
zncV{65V;=;EE*Iccs^{<bTvU#Y2U%x8^d(&Hu!}G5=pUtn;(SIhT5*eY}LM0&^K0O
z-biP%2z|)9751o5BSamev}E_^k5M-|lZDm5E%v*>D)x9g4!s8cF%|pudV1=;=^BMD
z^m7eXV_BwDvORJ))Ya%}8P2{YJpySg8GRXsAx@CmoO)~ZFB>)rlinZvU*|A0*2)Q&
z*REk`nAPXPu~qd+>7|dsFDk!$n6p5eFz7X0#911>P`1E?Pp1;=xam)JRCj*1cGNvC
zu2gJ=GhJ_9>*&!CwmLwhUJdo)rO({ye`Mc{mm2E|>dUa}$(@r<SIh(-(JCN4hb&)z
z&{I&go-_R9b8&lO!RxcvgzmnLTz}aWq*P*H*R%<-p#wqmr$Yeghq)-guUn>nrWS7W
z*I~Is;lhX7nfl{bYB0MPxZPcNi%9{1BT#nAj44+c9^XgbeM#ea?skN<I4fKHno?Bo
z=_`#&v#27Qd?uroWXkZ;GMbaJg0*J|(PFp!mi5J2H6yrtqtf-yOjdU`tNJ{O4ElLU
zA9dham95=4rQpXu8f(`G)HE@<sg&DtVzAm|mb=lF=7W-d3Evcadopo&%MsloDNy@i
z@F}mv=|11@X9{(-h?i;XZM2xfVJhq$gK0PS20kDHxWoq)rV6l4j*SWr3e}mvgcET{
zrw8lC0lzEIP~9p9K?HVqB@cs|4+RsT2`)%Y)KoGF=u&1p-*D;&3x5{9{CJ63i;9(9
z)dZ!FDEEok97_Er6hEj(Wl|IupOu5!7m8?E9B?I}0xc)9ZTY!Uw<H|jBB@({8c%K0
zu2G~_7G*97trTJ3t`n=xaVTMyql-NXn$_$X$6+H0wy4=3k@hBHlfYG=)XdV*dZpQ*
z7|mjEohT8pVH3AgUl|yXQ}*UeE3>eQ`r6=;8A<yBC-1+&$<GevIWdmz{0*2yM9Fjh
z;x@7q&{=<{l8LY}*GR)tz(g5f*D><P%d4Sx6D^O@Wp9pR)8;hbqH+|sJ+f8sUHoV1
zR{zL}AAMM=<*we`ZacA_mY>X3wp7Z+%5yEyd1#VBQolVg>}V})kru0_VbG%Ck(qU)
z4%4=pRkYXaSz|jpMdeyg>LBXZVtTyd=ZNzBP+8{c(o$-<84l@oi`fPe)(<_esi1MS
zeCzGjpjuoG@=rk7NU#v;zHm8G=l~^KnX3b(#%hL^>C9VnYf{o(ke~^tk_s2)9}u{P
zAffnDOZRq_`_((f*CG+no4B8-^;W~b>?>~{He&T|wM>2@$Z)orqY}hO&f@DID2!e7
z;nF<9hpvpQ>>M<|IO?D=$~hsKq-A1H#GDAQLlVvD*Ya2_jy}h_(T}yD5zR$^g$u1*
z8=xiGtAD#Ld-+ZU$$EB|=atU%yy*V#Ort`w$F%uwDih@;nR_V5ck@SF+aeKF<yB_E
z_lk5GmM`%vp2maeqyT>*^7iSd4hc%jt{v8;@V3mS&t@t8B4J`<{g!Ea-dpS5yYQls
zN>AX-Q>Sgqts``h_BC#MjD|I?K_d3IrNA1u6OJTS&mfh|%I}Y9bz3J+LcCWf)ovjG
zZm*Ma)>CIoEtAq%8pEo>h^C%vr<&#YmhgO?xxm~l*yLL$t7|>~xu#3e^Yz=u0D!%O
zO@O*zXFwLNdNY4}&R|24MDryrwr&4&Bmqf^<isWZ$H3uzXW$`+ha&$|u`ED7Z<}0+
z|EBlV5#Vv45A1FlPLB4f(j^D`5=hwIQ~sB3b$hx3;!gU*ysyedx(vo_iOjBr(o2iS
zKX|E1;{NE(j=HHrD|*oo>pNIY1}VQbCz3Ppvd8r~(dCOS75uzgqtJDLkSwH=3wrW(
z6&g+pZM`nZgt7!yjmF9Je6DaXnOvWbPtbVNYyHTs$E#-kh(JavpPJS&ONqv+KY6z=
zrNpk+zfe*La8QhIH`eZ{1Y+?B8b9)-y2X1SiZuBXB>(K9Oa<?yQ~=Xn7Ncg1e-#6l
zpHN_xC^D8*y&?Wub@vsLz*9gkWB|L8lg;=zKgjvws(&D`Mfw&#^N?eVR2@Z2>^#ey
zCz!BMyJlxxHb~1L!K|1z*?P~)d`-BY(q~XRi`K}!|ARA((&e%`lU5ebKZRylHq6*n
z!6SD%u*!#^M1|CL(%H&F<z@haa99{<i7>$ns<)7D1P-w@4mT_cYy;Cd8cIufYzKdx
zu+GzxAG8+D$8pZ<(sj>dX~^WLu52kaSw-nH=|!J{)CVyO@4XKOI-LMFS09H90j9}O
ze1$UFr>CpTOgb(xSdFI0;6D53MGLvY++C*qjFX=^E*`zfFT#w6yr*Mmr*gmdIpRF<
zxH3YzwhENrdq)x59RZ2#?Z_>XPA<O|Ka%>@E4Eg@R#LeBEvjm@=k}{AX;sQN_mvB2
zY?R(L<!*x>Qx0uhWSA<PpY6iAE;Nms2J8ERXkh4;rv)gfY+7K3?hQFthayw1m0K)p
z%5bG5ta`b2hF@RP_-$x)U(HC4um+mk#<m!b6*83iBWV4!s^S~-j@fQ}oe)~xk4R)2
zeR{fEZV$P!TH~q;WgPx5fLDc?)rq*ZCh!lx0>PXKxpo)owMd(G-<rEQ6}h%`xxL|r
z?c_S&BF^Gyvjs2NzM%Y<Lei1i4X#qSm?SC`sn`{7&eHSkzXIfiV_KDsf=d*{%^^Em
zM%YQ-wQ>GdU7}V|DKA_&`|4+(t!kc>liKf>Gl>G1E(J3g(Wu+#)!-6>-Js?^k%avS
zRuc{*X8~js3uP1_=kU&AybiKOF}e%rSK`23V1WVPq57^=Whw_l*8%^n)A~1n$mT1T
zYESQ00}ee60ODP8F9Fc#(MFg3$8v6fZ{`S~bkCh^0E1380DS%2FGedA6hob2@Vfk?
zRH*z4hP-@j?auA)8y_nK_#sz+{Rr#LdZkth;R8Izy`)dVkYRXSQz)b$S2a(bq#n?d
z%;hE}Ccab<xX9kFzPs7?Q7TriP#42tcR&rYbN)R{(Hp~%?s?ib`IgNV^K~<9o!d`Z
zh;z9IQO5?zD!wYnb9J=RU?x%5(~k_0qHgVH-zHZm%aaOHP0l%1x8%9<+00jlvrZoA
zZr{p2kxHld&V$sx07<`77RMTBM}c!%T`>k1h|l|-@4eFJpL~I_S!(+jE!g7IV%Zfz
z#>5Gjh>iG1OF*jvc!LKV02H}7Rq+*_h(i{}ELNKdn99440M@rrm)HGIJ|GV3-%SJz
zx$@qCa^<nyMTFpOWKr4a{@JKy*>`H}UBD?q17k+2X#s(20e28B>ho#1fF)l*VKhXw
z4kH;*s@+=~CzVI{{|e1XnRAm%ejNppN0ct=V&7W;^MYRp#>d}b{1DuaFLQ@>K?{`6
z&)EA)l3)ZqX?$+lWeggXViuF8N^U+H$VN=FSFnY=y>`>6Fbo{HH1HD&$%_)ohLN#t
z(ybbJ&ZpO_luG(Y0S>*SJXNmTod`SUKL;N!4#z&&Z^WdkOclJ7;fu--#ars&%?To7
z?+|$QLanBdW~~@vIfMN?nR*c40B3M`V}az#WN!#9R#D==tdlyp-598{a|R?!{h)h<
zcE14Nbugc<%$el~QC>#>==nP{coiT7KPOpxbd$LB94vQCc;?C`J%?eXSL%N>*wr}i
zY9=HY0}hEbKp0{_fI<U^r*Jr06i~Z#Yg`D>m(i{F=9=H7`7D{3D~|D}*DS`Y_)%iN
z0V=2+h4-mWs(^#0=7LkT`zUUR{^QRT=N1pMe&VbC=mRN;%N^8<uF+gMOOh1(L%QL5
zAxcqU-|^SL8<0x(x(|i}!hBmm#g=X8<3nvo9!Nh#Q-vKP+pH7g;o>6P#nEf9PA|SY
z_&il*7V!+ryXu@ZV2uJn%46ta`va0KgKN^}qVs{hJLsb`-*5!z1~R5AjDpFRIofH)
z0PNp5B)6OJQnSIaVA8y`kp?(LQozGpr_g@vpxx|Qk9sps;rt$*)Z(-`wS4R$kHh+B
za=xLx4|$7%AT?+V_-=YN$>*Ms>Vn)Tsw?jm9fy9&2#xFXAR`1&ILHACpzRM<i4nCo
zdycH?$kz#T?EZ(l3*WWe95Q`;FqS(mj@DuTClnQ^HjD7OIxVo%)2>aU2Q{L6<IH3}
zXK(_@fJh^0XQLHhoF~|~j8hFwm}Vy#)Z3@q9tLL%pU|xxk`A3W-nbG!%7&4&6$A;G
zklfWfHhVH3Fuc-idf)eKNdqK*+BL)&(U9+Ne6Wq-Z$W6_@43d(jv?4StM7Ka?sV)d
z%=LmVL%(NY0`Qle*sht_1WKK5j!9sfB7F>T1Qb$zfOU=ua7ahY4zhJpt)3?f`ZR}$
z0A_H6ELD(?nd>1I@Kh(c0=TvQ!<FtFAwT#d(3hmp)3jNHokfD5!1X}6Sp97^8Ws%-
z!}^u;GWv2!PbOl-%RdDeM%SRz1%g}EH-O#gLSlKrT@`HcytvT)WUF+HnKPyXkiYA+
zN4D&g3jl(&x7LR93xIPSfAkWt3HAd9<Xu!Iahp2c{ib&%RGmD<MD0$?Yc+rz%WbcI
zWBjh8JgtX8Enku1<SgJ3o;G(SkK=E0K*t_siD}z(7(};7d}#xa`g4FJnU*QlR5Mcq
zkt$x_6Xn%Mp$owEC3AOsV*-8^mH=j-9){YvtkWP&-YPUR&>|$^Xs!RM6EwI;?+1;M
z)_F_;yNm@%1PO00A2!8Y>t6GfO&lKf#TtN{ttD>&BJ3;?5*FtW2#gM@0ZF!FMzna;
z1Am_a+!f=N!dZhd(geUpEp+pV*^s$=3Y+$m;(}BRb%Bp%$4SVs$DiL6*h$#W&jF0S
zaRK>-wi%#%iU!K;=ZU*?U=^UjmOR^=SK*x^!Y-=Vb0;HzO-PY(5LC7Q(JamF4fz&e
zmQ^N+sHc#9Y*?;?a06Jp)&L7UPB9t!qype<LoAkKVU9nm^cq6~(Z2w{&!xZT-SV29
z^Rq`{tYBwFHSv&kovxL8v$||rZTC~?#fC#VPQ>;1o#~VvD)9?1umPjlyL(G)m>>ku
znILcbvI0IRMHdBahP-KMo!Auk(SXTh6)>eAqCR;ZdV1uaKivHNY~Q%44Dpd`9l6vk
zAZ+qtgya#M9Y~IGr`q8~%$e0?PX0o>y#+Wwl}3P^y%7C2+8&<wWdoW@2k3uQZ2=sR
zp~jU*!6TfC22!H?U9(WNWXnPX%}0J}+4I{&(Kh4|2WAw5mx+qs^7oN{U^4^8r&Qx-
z*x^9$=L9IS<q^Arbor<g-{r6fz8DwtDYZ=-8m1LFw(z$@<DN5E3xM8uh7??jL1Hr)
zJ(f_w@ekswr?TG?Q8V`ecwebKoFJ2YPRJ@CPyO)M51sB`P6}6W&$mE@Zdn6B^B4WH
zLM>T$g%RDU3I2XU(aHQ-3r*r2)>OkKurhh0<#MY?T6ue>x=4ok^tGNVlgLtFjxyKB
z^5ByrlXELTrup$Pz51|`jqp|dxojKhKr(xgJK8VGP6-91w?xlN7nmxV5_XTV;sF}m
z_@Z)+b+?f_nmguBH$vb^s%^AT2rN^GGo{ErdsTh4_Wl=6+1_^M7U$?JzZ2iv(QG+^
z*RP3>TN8xZ+l-=l_2h8`Unqi#1tXKGd%{y83%mn0z?ZZ7Y8*X{3DE!J0f>4#8qQ@N
z;=_>l2djRGCnu8$MGec;M9-hL7fB~D?&4A`g@4SxJt!aXfAl3#($9{Wax7Rs1H*8B
zb1W}T0<>-lU{jvy4CiUmCG|WIUi%Ti+yfxW1@8`PPt(`{svA$!u0tJ0#unE@_A7$m
zfSfebIw0Xvi@5dAn`0_S+X~hM#^6>)A{*@o9s{#b3~D-N@n3F@c1RSDNR6F8l3&7@
zramPAW`sQ4C#fiAC?f&EIGxnHa)C7z9BnD69*7HeGY7T@h{)e-L2C$6DkSdS52o(h
z(}<b2%@<uIqm-u<wCU}jO2o&g88B2SU1>%jUp<^e<6|2fHYjF4B^Kk_#s9<+S%j{u
znQLdd5zeANWSFY|9<80N6n{t56=iF(BG{mA-SK_dtM|w<Tpxc2o_v%@ZO$EG3NUlc
zo({A3MLWeitt7NB4l`Ps895f><svAsN5UljjZ0U7sSg*n_T5VFP9LBPT(;Lsj&6_B
zrGK!3SSCh$)zc3_DGB5=lyx8E&AWK!SFbWG(LZVXHQm1!VIGz(GFUmf_0tmKOgOqC
z@rQ`!B5XY-K2=G#kJxQ=>6gbYgItc-MV3F=TKPIjY5Z2W6<NEg_(|LTmq<Jbg+3L|
z@q~s!MxP4gu>(=_a}Xr|&AMIsAR>F72rtto(zVvypYi8$z$z!z9*REok6cyenr(C`
z(^8)Z!FdrYLD<QSB6L9Q-?oCZz+6`m+*X93%Y!XJj!F@nc#0&#>f^eap&mo<YAMwp
zTW)H+;GN1;1_vn0M&J)&IM@<icscjyg%~F1DfXvtW?u-P1NC36$EwiNj7>x6Iw?O>
zJOu~}937K)Y{L#6PunmSROP3V3?FudUx*Yxutj+WN{xka;0Z2+^s}vpGlZqbMNp^;
zc`cTQQ-af(BG?#|ECkB#{;LH5(OHn9=B`@~KLQKpwN@~ONO^!u5-7OUeQWxjp^HVo
z)t52>uALm93wPY-A46Lo(CY)Y+ujoEIb9Rh;I($HdhoCUgTDymO~_XOlH;TnV<hSO
zUBczNdH9L{BW)c3eq*n`@wmG=n>ZgvDrE<-b+%$&F|DWVuGOumZ;41ffF(@(GR4H~
zx+<IzAnf~4FhKW(BY|lpBIg(#&edaz6xi`5Vo7YDUYNgHZDz#kcM4B^vLtEmcYuhC
zLIG4WMF0`tyGESLw&XVpgrm=V{~P%QB0l}ZlMcFhN~7FWnJ{FJorM{+AiElnIjnT<
zMXF!<Ku6zkbTHbk5v3Hk8joz{M&e8w`BqQcTggp@PKL{p)Dw7hWjYOS*KaBn-T7uG
z%K9L{ovU7%y61ouvcN?FN~#vQ=^Nm;N^}5<Zr+}CnSE8*ADJkCHudu@U`TmOs?9tn
zmV%E@1G>WDLepG-J#r%MgtK;r>)aVju|qPD365_M+bzgTQ_^KUu=5M~Fvy-j=5Dc!
z?pEfQ__C9IRgEp?NYr(dBBEzH;1mE^Fx#s35h=YU7kHc_*le%fJRuVL<w3I(X*uwi
zh+Mn1Ul#B(e4NZ+Ow$jI%Y9&oN{uZ|?#q411++#I#kZ#~+TXLdJPmk9q7>Fbf!o&6
zk3JDl7ee_b4c)&S+8cBh)sHSxNa+5_3#p=SpbaEYd-_e?C52y(U0g)QU|h_WFIFd|
zIbc(rRAAE_)&ka%j?9rDK&FF;S01s0v|l`H(<m4CA^D2^X?qfB)!rDowRoKgU>BV~
z_4^*mzy}>@vYfgCP~~{Q7b(a7=|G#UC0#?90G>qbTz#Sc^aNljo%Qd8&2?LH`0TWp
zY{^(WQxTr~gVP?Oj&z4qVLhqp_Iu4ETib02-;J-j(ri|MkZsBAm1<4M&fGUGH)oa7
zZSEF-pRXu;Z~Xx67zfdxpIG)z+aSFwOvTQ{09jsiX|YQ&?*G880JjrS5QZQ^L?7Fp
zWU0-mxnkk!rrrFxUBf=${Jsc5h#niLewk7(A?W}%z4}^mbaF8?!$eM7GF~6o0`hT;
zW%HSX$gyv$g%%<RV)8GQ&jKiR(5%~wP#3s+QUM7W_GPX(mO>ndoC0A0;qzcM=Fo>i
zJ;Hm8Pwj=;t}rRQ4pc|WcnyN>B;Q$qr>3Qp8=;yxFLyouyo8f~L6J>BDqjv-w9sWz
ztG5%L8h^s2x^6(PwVusw`X#$OkpAm|=?XRhg*)IkIM|$&#l)T^e`&#n_zI;Qu!$-2
zMYFly8kFYRt(&KVSbkXG^mW&EFZmmz-M2drkg()-Kjk_!ByCXq8GVfJ6otx`J;4}|
zIsEbT{V^qEbJ9@|W#kQ%%lh{*;5U`!u+6L=GIV2_J@aA)=IKuSh#=uC?TWI#r1wmU
zTm?kKGAh?lo;%A6c(?r&&KyaWp*0A9HOF?|MWw~^Gb(b7JzkMV?>Wz`cJo(sv`#fi
zb+%1b`WT|AJP7Dkt3Hpn<ks}PLh9HKGi8kX=z%5E8~P)s2|q3WNBznp8siJHyPg^+
z)ucac8`-Y)SxN=Y-0YZ%I$XTvSCRCPX7$rOF7o=Lb;acz_Gc=G$#hmXM{=yY-5U0b
zGA>fN^n@xY=WlhJ-w;lh8&<ux^obj<8|5K8wM{gQ8VxiC$#n{R=<qy9tr_1zKibb4
zk;JThH@gx@61-sA{G-@t9I*y>pDm-rE`il)3e%4mXl&6fU>6TOe_+Z;MI0O{hw|!R
z<|tN~H10Wt?q-U&Yj5juXAH#vTBdE!s~phGDHl1JaAsq01zp!sQWtaoIol?W2=nrq
z@e`=5@Ix;H`58R46=wc$#Wfriv5ypx=^_hvLvVU~D>|BFE8u#s2Mr&B{fyEr9w_^c
zOLR>pAO^6(ko`o=?eIOcRme;RITm}W@NSkVGXP<EkB#>ZaBcRqHM8<V$w@^#*~BTq
zPbHQTh8ECO8Gc5g2x={g*%WPJ73X#!#a@iW8<;aY%|hi`0NDPs3cYe9lqWw)BOAyD
z5GSQU_%$&_Zrn!R`Mf*XeCkPVZ9<wEc{^c%nZwNvX;R&u9mysn2zz``QPaWmK)J(-
zwBJL+Lv_my`5hOmd;}uX1aS_210Myu_@*rTVr^CR1RBW{q>&R52H=gUkhblDyM9sa
z`oY|ANDji+#`M~za{Jqjz_lWh09Q;epX}HYRn0yXI;sPGEszm3AFYWRZ)ixy9n3{f
zr!Xmsk5-D4?pdGEG6C7o(MxFjRyCR6m6SX&@2lz(qblPet55HX)>`*Ij>0mq4nJjW
z+6i^L55WypWz#p>B0gi8_2Nj)@FJ-gjy;pblf(^$3mNh>Ad!QwP4Z*UDu+<}n_QQ(
z9NiBppH>gyuDtQ$77pJ{Fc=eMgfOJENUzIllJloEs#I<?TnGG2@uOSM>Q0`UIr94N
z;`R+Z06+rv=l3d4hxSE$hq*}Pjj5rN?Sx`4UCG-bUCA<&utl)55X+bt6ovTO<emp$
zCF;0-EYCgm0K}MbpIivpbiM76$9_G%wSIC2D*F!*M3mge>_k5y>;T(&Q8ir4@orc|
z<qaD#_H&%hEW1Z$xsq|jjdq_fP;x|3Xk<6YI$EM8F?Ai*XFN~=c;}bhmsiS3b2x<;
zpm9v00C{}d!@jhYflu8@Ne%;Zc+Nw#4t%EhMLMgZU)WrkwxqvdYgY(TXtza@Spf>b
ztwrp0?9n8^876W7@@gtt*mLDRZBIQa)XMJK4_^!Jc#Pp;iW5c#=*@!Aw8Z{_h3FyX
z>I|9~*TZ1IiP(S|DWv}s-^u9<TL8v3<U?+L=m#NJ2^RFHB6c_eM3~F;)u)y9$cOu&
z;=c|P50fNrB)##;yNa{k7Af+7jNf*{cT812@f0g9Q6hn8mrewHrMX`cU+p&3R*IGU
zk{(We{YTB*Dv<rBjnaG>2S?cgbxh0MAVcSIy13;8vT2+<eq4UMfT;WC%oX*CD4fUy
zma5@(G=6A-cdq7wzRI?WV%qzyu#3ghhCf^6h+%6kcu;EeG+GY^xXp#T`Q<(|B$h|N
zrf--QsR-Hb&N`H_Zu4yO@+%zcu37@3&)<HzSc9g^n0`sw#zn=+t+#KppfdKlxjdtI
z0$noU1UEqS%QXaj51|QfxsEM)kM2q6{a(@lP{2T%Evj1|x$|HWTX)<rQT{jAkDVtx
zuhH9|wqGpyJ+x(5MH_px_ErbG?1f%J4UFM<P55I7;t-xSPhKn`DT*+>f!Rx*2W9MI
zV_o1ZQ?8;^jOX&(F5#O2M$8^^J^W6gr=8@hb|QfauKT}H{hp$UXtEGAN`w>YS}ZDI
z8<8iU7Zz?>6ZwC|b6&<K0zs$kTCZ^_jlCZrHFROMo_RgFy}$>>UPqxobNjYbkX<D9
z+!4RN1G^@)Tsjp?#-kcUqio|7c!DU72YlaxAE(5rd`LUZ?r7OnVUI!~^QRP{kc*H;
z>Bn{{S9AREaA#A3%u9LA=;QnBGPoUa6m9tcHtlx@<6>7_dTO(s0-_^u0X`y$Y{wzy
zLo6PNlrs$=b)am3;_8`%FSs0a0or=J^z=h9<7xppoWhKU>t$%}OHSpj)nU(Z57ftl
z3^At4Ty>scFqc)p{EM4sAfyrby_ed!wqctqaZyVYOtuXQoWw^^Ba)83eoo`iMkP2>
z;QEMN?R6t>K9`$WD35^k8s+W5?}n8}bWzP@&Reco=y#3~$~4K?b}BSY6}m8~PpINK
ziihy_dpmA~2YuM-qz2fdHJb3Fr3@aciEH13_$*PTcaF-bAo!dCY~T*c%nONDfvXC4
zZTaSr1!nDOv&PYr-W20bz3-Q)sA$$Rd(x90np~nk2K;O>8;MKJ13j{mO*i|f*0zX<
z@;?N#b`v5w{268`V(xg!UX!e*aw3(|o<x=s6?|V}CdTgv;-&}73IYrakTzZf?z9~W
zBWlGtgctRKA`_P<+j+Rh9&7=Xo5lzE0p>xL?!4Uzc1WP;C;axfRKlCqbsDf~3=4?y
zDbs?!Dyx2HBm;|u$y}-%b<&^gf{s(m(Q>9THX<zYbM;rllR!%qY|(0!u_eNfUI+$#
z=$YSY^Y{xuFCa0Zo-iaFOrwe2ZDJ?j9XsrDqR31+<gdPe0_7XTxM=>P9X-pb?xFn<
z+W^q~?dV$nCTb^X*B&g4N$WBmGBG{lId;ShkU&{#u&~N#Ps$T$CD_hDV@~`*%|}ue
z7Ps^rCeeCeWjaJ7F}68-Y(+ONuK89{C_w?qUr;#LZ=c`iwkD(df*OqDy=Z1A4#d|`
zlwJr!l@|UWTiy^Y&Q6U4uXJ<!9aY;O_Rm#6xl?W{M3^V{BISx|@<l1S+QLxX#-8cu
z4fVOH(n;ZA4+eR?)`XQ}DaFZ1Hv79pn#$fnYF>V_a~>QFU+aI;5~qD(PrJ|6I8j$w
zxH{laZy!~Irh3KIP1;ugIGdW<R<>n-5t`4OO5^8FA2xF+wH`U51>5CLW{7uFD^+`o
zr_;$T53~igir!xWWW|F^fC1f4Sj&+IT!$tH%Dx%C%&)CcSVTv?U87qE=49(A-h||C
znq&`&?&X2I3$afG%*u9R?`e*Nrp0kX9`+E$3wXJ!wH#Q8JWa)1=_KNJ|D(vc1ZbWr
zcrbNs>TrU)vH8sO4~m8HWl&gotT4+u`jIYbmxPh`S=9l*pD|z(D;0>kder{3P&ye2
z4qKOdrUP@o%>lU=dc#6eXnM9)wLyOTGak=5cS>KiTWl6aNV>Lj1RzD*g)s>!3qHAa
zA;xD1^sfWo1^gEA^e;#FD1mc|jBYLRU^I^U@L2#Giu7!)Z30Nqby+Vid+m@EGw_Y+
z!x+HAzMlZj*1}^JGgm(f1i}RkP3Crwg8&gd5a2SsRAy&CBkxpml}hG5AkS)V#G#1F
zHMlcm+KMz|qaXhBhjobMIA+=DGZ{WZg>6M`wM&W{W~l-suV`xP3?%onIxQ47S^*n}
zr7676SlBozffZ_cohnYKc@WOZj2H`|jwHO1uM2T0F9|es5-@DP0_1;bO@PAFrnsC#
zokj=aiEY=Bu5(J$i3D(owQ2Hy@oaR9pW2*rJ`#HjVp5!H;$~#F_v31;PPao_G6LM7
zZBbKy!)Ze=Zs;xDqwpp@*v;=jSHo7&-Q3Ub)wNz7jKV*<ktTJa>~_o7i1@dUhAPrj
zMZ8hesEW&FNoXHcSnW*4WWP^pr+ti9-#Ihqlp0%LU#~enlS8yt`?e~^9ilNIfOF~=
zh`;AH`7TwbWBR*OnN>AHqkBAa0>z4tpyCPK0T-RITe}-7xbd%nZtbfT39h~u)jCrb
z$7O`HJSA90zX`vwh<5bO2bb)JdIjzZpk)$O?bnO{6%SEE5Lr1x<m;xpFa*6{Bt7@*
z-q)6i6i#Dy9awrp{Oe<B-+Mt)UA2Q&;hwLI2>Phn>xTzc{xsMkEs+CrO}$vg-o2?|
zl$Rin9R+$?&<=49{E(GSpS#F(UwqW47~ZzP*bYyjDExj*SG@T$=soHog4@oH2im^D
zcZ#JfMKOC3+n4L_QO$UU$WGx)AECIJ-tt%ZgrqF_ZX=1KGkg2Ex+XtFPz~VwD!1dN
zV}S1?dj9amQ+r&I*wJ<fCuz?ZvLBuy&ncUT54ikz!Ylx2*%CtzG*@#)OW9T%*wE;v
zctx9q2Ob^=Y!KNKAsNZ~$<nf)*#i+X3&}TKe4P)R6Ph~E#zjE{GS19+mc?;H0Hv=j
z-Dgl>N_-_!@TB-G$eVfe@`ID^C&}KM^3g{pWmYPBDji~VH1=t2Z0P=pP+=s4BF_`n
zO4CDkVI|L|VA=%iFLpL-JZmO*r4Lz+=J?hQf7Z;ND9zAPGq<BY9MpTA-j`Y9xFDUx
z>6STetvj|=)D$=1(=t$d6rI_STbq3$-8tJN;jZa6@-2zf?4bM#+_i5p>?Y-k$~xs!
z2egS-#cS@XCH%Tmz52#P(d9IAq}{<~aN07-I89EZY7=kC%b_BUt*_7GH9+jPG_gl2
zE?9*H(kcz_r|z*+LSi^5fp!FM@mBRE$v-3TJu)-c5h!dpqJ)S~AUuCx4zAkYvv9?h
z!|FLW#{FDY0eSfMz9xLQbIUGV4KA-ws9FJL>h?$x46ZijqUSVxO_2`h&&LN29aO62
zpSHg$(z@t$JcM&iXzNEuil{IpDOJr(Mlout9U@_Elin)>%Cl%N{W5_*rM7S43178M
z(~V}CiqPA9{amUp{sU>w-pE%Uc@)$p#2DG0HZ5RVpdMnl%{W%Sb)9QFw&_o{Rvigg
zT)ku*%*mp{bgvw_p($k9-by`fd2(H&PB5rVJD<Kw%WbcR`6@6!Wca7ekT#U*SO2M1
zb)kiP73h~>R}cU4zYbxBQ6L+kuB`%nRSgo$$LV}9ECrPdWnk4R%HfSVtjs7>bwUrY
zXTNF-d}bKO*%53w<7hrH+l?pRQ}KdEx%nHg5_N^TTMi28JYVfUt$l0i*a;#wJ7;pa
zh0uD!d!_MtOIL0Tm?8qOJV`e-F&}MGm8nvib+fETtj=V8l5%C!jFQEe#2upwk+M>|
zCNsLj_0K*QwT-)-RAg6WmCG(c^2{?`5hj1fSE&BJFU6HXffuq<Ns-NmiJP)UXBQ`z
z-O}&A(r-f%vCT)LXeA|tQ1~MQq94HB?R!My*#;#UQ?teNrd~jg&m~#U(meJ&OrfjS
zdskj$g&w+gQl_P0b2FsARX-k>iQ+@nsN$MSQS;#oeQB2r5g$Y9U5^}p2g<{N4u%S!
z{MqiR`?e(!&D>sPmq~nuM}=y?F3LnQ>)$AiI<d35umBZivD<ru>c8;~1vrDmzqsnZ
zPji!NnwNbp!ryXdU!m=BHnH)b?Rw!l7esgh?Yy?PV@|kw*K>1LX*aB7GrtJ?;h?1h
z`&1`<Dnj^*h{|v(8N+lLOU1(qQJBNxs8;h06RYY*MroBzO67IGe~Jhw6Fe*r#7i_Q
zc@sX&M~`(Oq%)l9&-O$%8k1!e^(7ZtefIJ;^Y{|I=L?3Zw9P%k(%0)sRpKb{grIsp
z?=bjHRHQJQYB1sP{->u366XF{2)D1nRP(dD|1LoWYMl<4x=p$;NegWTYG(tOvf&L0
zcFI}V(t!AJ>C(WNam5VHjwz@w7!hIl8t`GE(D(8g#J@^}#hM4@ehz-jU<UL9@Rny_
zQ8Q2jG{6%L2-F*yV!;6EV#j`YDZ&;~0}fNAO~coJbNTPr^5ILBh1!zn^5l?9VDb>E
z0q_1Zh#dxg$YG?I4ty9Raw#L4!J*~`7s2EO2CD=fT?*kK`3C-y5Bokp_SoO=|M~a*
zasJu2#QM~s59cy}z`@7LW2wQ`SP0)P>aQ?0c$EK+x#?xb7d9`g&aA)xy8Zuqc;jMR
z(D?Qb9AYG9aI|^<GfXMinkF-l6#X6M25rWY>>_?ezAq5mU<3G&Q}q><n`?fuWJT3F
z?fYP8yh8;xNZ5WjdzrB`!0(XrLIpPP))d$zy>R{y7G)Z&#~7z5J$UJWcomSKc+6V2
zVgxl=j6HsT@(X;(1w|rh*1r?r4*FWc8m$2y2dA6wzcK!93T#VM2)~CatV7TlKQE7Q
zZL{GIR1Gczlb*}5FUo#)V1)F<g+T=hK+L!tBJ=XDUWJgGlj1*h4Mh-cuQ%~fk^J@D
zhsG!EPL*~uted>W39rp3m_@#h^%vgDF&{otO@9Cl&tx~*NeSeFhFwbe|NU2h3Jly~
zK3@|zchGGq;!J{QMiHaLiwZl8?JsTe8&&NHEW}^TF8*|yHCaEeS3*#jFZx+b@0elo
zAS0DKm4d22Hdc$2lwcYTaBcltRX=RHg9#6|wQ#jt8gOlo@nL8t^{;3Oh3fDVS&V|_
z&X)K`_j7eA8e{#dBBcsQ0^h<m@ng<$9Xc+w>a3`J+Z`XlgpR9-#K^e1b3^;BRr61D
zq%;Y7v!e5+QR$>4e*c~qg0Eit{_E?)5!nT|t9k6R;`xI6IR|L@jnqa&TVHX?Y?ls-
zI=}kgPi&D)$IOA6A@UC%O%?LH3Jb*gL#)X+-gZ3j|2BvUc_(T%sP$gaOL`7mlBh7o
zhL@e+EnWT{s%0g8K!R_-ne%<M;R;FmLV8Wanec!6Dl`cnvKB7kXsn#Xwpe-{5qFHe
zT$`$P7Iq%g`&aofzbtnel$V;TQ}|hY$%6jfGU30MC}vn+aYPR{<-WGj63m_lYg7Q4
z(MWu(Xh%S8&@octUuBp7>_T9ZC*|bAlRM5^npUNVoRcrxrmANs^vgfZM-D+9xbciq
zeyO1W8Xu8C``<7Adp?EY66gybWt8_WXSP+I;x~iY6I7@OV}bKiB%rbbg~#oj3YO@#
z``$F`t@Fr8NyiCHo+;OAtmcVI!1Mxm3brZY=$E|yLJh&*&2Q)zi$u2OQ>KY0KllD_
ziJk~}^r5EXQeMhtWyrVvc_rygUyb|}|0+eg!ka)*Ao^uRc3Cf4F0`2PzznShX#9`{
zL&qMGHp-i?qJOnEYznAv{7(8g`?GB0dl^8lf`?_KFH1D5qruf^QWp2jU$q0ir0NHx
z)CZX3O+Zx1>a?ZocK7it=PK?=uiGwBur)c@*3>1=#3HbXDGu1Vs7$p^(|I5VT*X)W
zsE{wBYOC8+IWjg<EpBXo`nNoYUmu_$T{qd}?c7}W5(~JBVbYMiN<`aM`S(uf+YI|S
zX{8)@lY)u-hSydaOxb}CntDrs77DOY1WK34^p)l3Kpe=#=5U)h>XxFBxZhWgQ|T}x
z2YkqnzJP77|2e0B-}q1M)+7>rgDEf2yZ(t>JSKY<&@^HB*X|$w+nN95+R6mxazPbC
z`q%*cLEIG{zP`!7N@#r4$OvW{D)_G4_y5i0-)}Z=!JxEEX=}O*U}gtqdt!1#!(kf!
zIc<xs_PwS2_jn=v0~zob@7`==adq$qoHSn{#<C@P?CL*L>Z(F*8uUZW3yt^35oR=C
z`#}rr%=P%B(A^KTz^+{XH5QwSFA9DAt3vw9p8PWs|0CIJB`_X6)=gm?*z*Et$Rqo~
z78hgS=ZMD#TfD!j?ChTp@o&2=5rCQ2o8%gLKw!4y?yo`N84m$Um_F-h`^SHZ=szvr
z0vk_)th-_W?0L(NP*#snq3~~IBEn6;gMJ+CY=8e}^8W5-VEM$sOl^`1rbzQ(Mx^Ec
ziQxY#5wz3YfpTCfs?B`mLRE^UC-#55PX{!jo<9eM8{;yW_^zxa^f4WQr(QkA`kw-H
zDZwblHQIO#_klE$rb-u~o~v<86+eCay0V{>ZO)k+|MpgAA~;}%R_!|Ojj#>-;eU3m
z;?vgY(0WVbk<Vn=8na!aF6w0=TR2-`Wm*kyIg01#CqZ>!v2MBiy3cp9(1ehL**oVI
zZhgxYP;}*Rt<4s2l}2d2q~<*{j%6z8dC0XXnC0RvKlA}Q_bO~PVX$#ES5PN?zd6^3
zN9%NNj;Z$6#Py57PSd1~;ycF1b=-^Jfrs|T>7ZOIu5ay1b3vYxt7t4!RPt&ZI*s1?
z>AzY4Wk3DLlG+s_k0FR(Gt!D+=Um7NcHlTR=B-^yfDU2H{U1ewgKTEU=JltYON!fb
z{I^~A3a8Yk=pQc%rSciQX>Vei?&k9Br>pt1KlCG2TC{0u9GobP-7L)_ONxBe<Yqis
z3uZI!?$TLz89{<N-!-Yd5#(UK@}8u>lxhZE|9G*Q4${KU#nw)ZbRVWX`83Q&2M(x3
zOZ3Y7?{{gGfto~%i=K)2tBWdffX<mpwyj+#fgNPX{~TnoXONLkuMY>As4_oTPk*<L
z_&{3ydzd~eIbiEo(Rx`N{^H%sT7K5Kx_-z~@rn3`%#fc>6;qt|uk5Cq)$VCQ2{taR
z7Ee6#Y0m=HM%o9z@TW5z?@B+i-k!W-{pFXbl=CJ>QSKucN>c31+Yp$geoj$M^jCEo
z$^zxW!m-TwT?f&OF_{bYK~G{Kh9B~pD}~K1vskM^*alJC{cc(TLx_m)GSv4qg`?*6
zuKmM>M48V^*8LHJGDXlqIiG>C>aDBlN|RmZPhm{d;Y+tX4m_gLv9uJ{4UqlGYS(bY
zMfx}$Exa7V3Dn&E^as8(4}95qTAC*X1TH7J%;yR-wdY!@+~)nOOn0C4f5B*3MVb6&
zknCxFyk+gp3w5<ccbofiJN4{!8O^8JGwI$@s>)QYfS3T4*=19@cQvt;uM4cZbTXyg
z9OQqUdR5{qXKQ6DY})7Gu+B>IoLDf4M@<d{0qD-c8;;^B`hwo>Z26AlQA<Tm2MnKQ
zqU<FrR%2TwN4s_PO4X(&D-Hc4$aBH_f_DY4t>+~hFzSZWOK*7I+*<}j;LTUTKh|Q(
z`MdERzXBUCl$tHnR=h1Kj9Lx$Art)UE#mV>P}T=!3+J=Q)AfU<pHqx7a2yQcN))@8
zbWpwscx|F_$J=YqxY#ljNPHn|?J%9UJkDi|CVNuG8TpyQ@0wE-UQ2s;JmBs6glqVR
zGTC_1D@>Uyo01#^-w|A2o|?3WOz}UTRCDEp#hBBqmCy*p3rwEG9?x*Hlxf*H|G-G-
z<uq!J5q=Qh1a~_1G*^mCtyYTU!r+v)rH!!d=M2?026?ecOHi>!9du4B<|hUQmke(_
z^*$fJss*KTOuVm`87s8Ma5bh#F~=Xh(Khl?RJCuJR1p&O6T*{C?=|t!y<e(GkcvB{
z2R0>4R+y;@Pf<3?oSQh&pzatAQ*)dfzT9Kdaj)FI*ywrvDIuPVag-PM*0lbzb`l>P
z+q5(yOknQ<MOAj+!EC(WZ#7_V{|tS2JqHVN2ebay>A;1I;J?YYpBVI7jxDs*euqFa
zUs^XT#O(7-M`=tTiArs8Mq!InhB*)W8ao~bf+NN<q~4uoog=d-zkVX>fz_YK<g?6q
zJ$*Wc`OuRQ8No6Qv5li_nwm`@Wd#p1jXiGZ?Ggv4X+;2GnVeK_6<FtgAzNvX%wM`k
zOY@-N8qq8uu!)mr$F<B#0+YHw(MMqb$xO=EOUA4ude6>oIy8=6u~h^S#7mmqx-IXT
zZ(U8lU{9xePrVcmeH?Yno0fn9wQ>FZtnPv~+0v8EIlg?6=+3T0@SX8`qQkCLKyZ`K
zP|f>GtV+X$XDdSVS_^`%$&=)dA!|92&AP)tgV4SIapL~hT@nihT06^#^y>{H@HyTG
z;e(G#$9x<W^5`Y2zN@jf6(4FJ^@BF2iNk(1ApR=Q{+{*O^Wa&FqXw%(Ae@|wpj*-7
zFgjDHltAor5COEclJiVbxA4jZk<*udk?V?xY;{Q^q{DbHbNvA_Z8>+^gYxQK^SAo0
z!>=@}-^~OLA3@dS9H6U+tXm^dC_eNi(2Av<)4RsE@k2Tcf()}Sug^?{s8q)&>3_N1
z`d5(<`S`ut)v7l%w#CD>AToFsvo~9qO!GEde|si<O~??J*tDS1QYMQ#c4?<`<n;vt
zb`<nAe6zb8Ubb!(k5ZM5lLI=5S&FK46z`q4gYzA_#@bYBOTTp`Jy+4}w=aMEMQ!2j
zXIPY-&-a5N?}TG)+V-ivu{h`SSA~jxO?r@*ef&RMMWv;IhMcGHzl*b^dPGCmu5T%n
zs#VwcstruusJ?I7*qoJx_vYAMdd_8}PEMjUInOp6KsUp2Du_2YbnDATwB@$X_VqJf
zzZ-^LW^(`1k>=$*D&fr}QKcwu>JmNoD{<;30Trb1Ev6zu-6U_#par)ab;%8d5596m
zkxb~V;Tsb+ifo`wov6BbAnh7D7q2I|hHDu@2!_`9!`Na<WM{Hi47%E<c3oXt>5daD
z$TKxgvxEC6Y=rR^4iJ`aqa5vW>p5r$klnd&{qKeAF3?)%W6yF4>p1^>p_5}|pDm7w
zSc-kX{#dR}<E8SrfJgz*V~T|ZUVp<>Y+jg8pIYyHoC<T4cV5A=Ffd11D7$r9S~-Q%
zxBfOwScyN=q6X;-&Z(<~L!9T&dpeyflOWagpD3y8KGkBSjYRszHq=jN;U08C3=qsz
zA_KhVieelBd^F3~ZPOUhnrtptoVanfd=7uA36PkfyFhM!z}KIa7GaPZI`~&bzm#ni
zJD=EAW?zo}j^x6Ihzt*B#zFjzPDGKH{Baz^u4_Zn@p$5nq8w5Drepu$<+aD^t68g7
zLW-ZwdYF<M3%i3C1AeCV(V4^TBtDH&>u|ESm2L^WQl?totw{`+`d1m2;VKf`HeVKN
z$kvNFE><AquglJabj#?t`ei7*zGncRTy;2f`{Zvp6#~;x;@y02u5@6DDRernusP$B
zhSC~kx+Fr@?s)sH;(@t1YK0QXDA7$bR7ZWU?ReZ|jppej)r?7}u^-Rjq1dz+hb=Tu
zlc2pZfs+F61`r5O97QdK<48k~r+KpbYQE?B(1$MmH|GxP0~!I6jIT@C{i}{=*jbi#
zxV-m9^FAu)YlGUVxJt8bffffy4rPv^>`Gu}P=fZfI4g5a65jeV&IG1zjnUf|N4+3T
z(f`&ngF6(veC>f5J#!?!Y2V}<ihT|9x_z*aHOwN<LQ&C$pyjT0+woZn{#>9;*Z(EY
ztU5+)*+@?J8)S9b(1)LWyh3~Lx~{F!6HK6Nq`XUA{WtcIktmmE$y0omX%1_}h30xM
zGG36Iv<3Rro=AkeCpjK<!CNQP*~B(};k%mXfmKh*yUIR1lmH?>{IQje60pte=!$Rt
zx}Mmnux$o)3$JNZvRlt0RIwwd@6~kiBDq*o5ArNOOnfY=tku0mwR%Zyvf3fny;_kY
z0ai9$S65Ej5F1nyy&T?d*aWF<v!5#KMJTP68gv+QeNhSYPu{v%%f)iX5G0~Tp${6L
z46Hd!O13N~NRM%Z2P|1~=;nommS{9~p{Vf%w8K?noFC<}#6~Fw<I_ziPNlURBSHcE
zNX_jD_b^oAm-jZrNW`0`b?<yOTnYXQn^txPseK8C*oTKTLal@mW-Pu;f{6xuso@VQ
zC<|4TkQW5QFt7tgRJytjx=-h4QFP1#j1Usg&y1(?tkKigToa8xD}FYJujhuUPrN2k
z5!1k!(0al3l^1&Kq}9`^VG?7I>xfzT9)8xpY*|*TT-E0G_+(>M^w?~{^|L>lPu%DV
zsx)xaGS!l?aFwFJWQ9lt4!i7q?baK*r8TSg`NSb0W#jA@v0wwLl%89-9~RH3LblnH
z&%N8OUOKRJMFg-Ls6*PnB|SmDc0ha|6VWUxh2Fmk@oIF<<v6+$xx&DeL`uL-wZXGO
zO;e4LQNgkF<w;*xuQ1o}g1U=x?hY44gmZq#<gY-&Z0RoisEJ^Csm8(|R4c_jZul!E
z7i}?>3{;{>90Rh@TY=WO?QNyC0yH|Q!1FBoMgM_q{_u2}Wlz#8#n~gJ$0YJI-Xf%$
zuIi*6>`3EGCZ!d)H-b&)<;1+<Ey_l9;_#3QI!;bpvt{z*i9}oahtQm_z+J;7$DTJ0
zV3xql&8S>Oi1AFz2*IRQeg8R($@It^pFv6a_ACMmW7xvU01YoKQ7bGg46oVzcbBw4
ztOY7U=a25*ln9?CJV=G$I`>zjt$0DKnCG4^kfvO}&pkt<E!W%X^!VJJ_&cA?)-K|W
zYtOaefKyjjlj-;uMotckMEBLiUQV?N3pWR!TaHtHvpx|*gbj7(U1n`}qvcVDpjvzR
z%{d~!`foqT;J*j`x9^3@mp(7~%vr_^GG9#VOEy&DWc&On`>?K7Q^`a}P-S1?^woXg
zE5@?3zo*AQ#rR6)L5hieuZm8k14oe7i&??RCqWsrkuG|ZCI)dBbet(C9Rk@)XKlN#
zip!hY3n9pe^gZqC3IbAklg52;1P+Isi$*rdZv8Zb`CT>GXe6D*l)F|O`_vsv8jpj-
zjs4xoaehzzaRq)qN;udLB459r;_AYDFtx?V)6ymSY<r|}!nWz;>~}{fb0$k_qgC>+
zE+q0oS@G>YXIS(`&WI?p)3E5c)5ONw`|7I6%`&;v(j(`qXnsi(pxd|MqGo^_VN`fY
zwT+q1x}5f8lHR=QZ~}@qnx*uiK#x)WnP-zs*Pl}AIO)f6aEm*fWf9+tlViIA_0&^o
znH)<f4<DcbO&Z^Y|7)#{W6flwIVOnCu|Pw_bXPtm#f%K~_ifhhQad{I_cJZ5gVmxq
zdL5<9uUV_eLq4YVCOQvjCX(39BDJiVccz;a-h4Hra<~}7QK>Q1cvvsp7XCvHAMcEf
zI1;mxn@gaU!)_e^fzp~iaXZban;#nikG_*aL*8#iwCuw<;A!`r!~$<WMb0jJR+(!d
zF}-Bu;^gBqREPE)5(PGjteOv1-Ke=g{o>QnUUKzESh&k`;iC{hG$t9lU6)NrDjgor
zu*{a*-CDH~9t*JEy&w|C%8a)5bZwmr(Z(W--h0$mg2siL5tzAF%*Xd^d8hdq83Hy!
z#T444=Ee=y8}T&+t+(7o5Az5{P@t|Uj*oa-YIlfduA>lgAhudw+>m{jT`h2^Me=}N
z9X2oE4^#lEVR?*Wl^<lP>a)FFB<)mj$or>jvLW$;^gLF+lgCd~qRmuud$Q9bWZ<>U
zVlGt@CYOjUA7KA1(>1UEeav5Z)=B7gH(F><*s5`L=pk$KZJu)??!sft<b2?xy-`)=
z5&dPo@sX}u);p@QrO(SrP{qL<QOk!}pU}^F@XBwhWK1@78?*gvp&Te*IQ%(l5Phv$
zhzI9SR%)38|Fi<BUB_AdXkxDfe(rRzgJ@5a#?Q=FMW}6;X99_7??VsF9oiBmp2bBO
zhyz4$=`)SBPUE~#Lll<w%EY7ieYnS2c=hHCn0cDa;a@oaKgP~7D9)~1)3|GJhmb&m
zJB?e=V8LC2bnwPqgCuxD@Ww(2?(P;G8VJEXK;!N{5AS=vIW;wNs^%ZRx~OOGz4lty
zeXq5V)JJ24Tqn8D{^;<IjPe6Ho;<%8GZZWI8I&*<BvUBl7%ob;kZ`TuPX%IRbC&iD
zw;&3t7V4a(BsQI`^cKVl-4iG$(Sh_zGqSD#`2lyvO7yPiwC}vVGx@D+O-MD`$S%-c
zaXHfFqz0)AB*QwUZ{PTY9<G!~nb>vhG2I3rJ^Ik6(mp3;PJ8C5o))l?S*`?D!*0hZ
z4h1ZcZ%t0#TgSN{T_UCTVm_BlE56x~;oER-%xtu}NJrgIjDZ%nLGe?TP6~5_@{%mi
zB|TjeeQdJX9b$$BpIToM*S%$7onb^6(?zMHOR;wdZ|3N-Zyh}A07h8W=<F5pEoM~o
z=v@O`T-vejgX1^?3?$*EOsybl=@P;HXN;FGLa2`%E5-Q+rz?#Gl{j+C?jbIZT5vjw
zGt*a(F>6O;k!JiZVDPY&TbE!lBslcbjW0W(46@|)k$gkSK>`m?HvmHnh>}eBS&b&b
zZRVz-D|=qHlvv(Sft(E8j#4|SMfs8nLp<05s02La+>4GkoS;xKa?Sy)T#r~%t^tR!
zOQ_$br>NST>F?h4lke$nH_X+mY0Z+)D5nLwg)$?T891NIZ0pB(j(pEju1uc8wM}b@
z!AZQd$9e1U9~3YU!fRWByjlo@s8Id1ImbkS9!vED7LsUEYR>M*>BoxKB3cQJ)YF{H
zFXmVGSU+-Ip{$SFBSUm^l;F!^pCQstZ$PhaUmKUz@Y(&t0Gu>1X4F;S*$aTV`iJzX
zFs@yf!@P>p_mpUfm?oMaU@Wb1v*kyl;P2d1`+JzM5BFfJ&7Y-IiuH(OKKq$=`E@JA
z|4=mdY;Z9WEwKI*^`TTxG&fv-kTN%<x?d%!o*L}IcZrZDhU9f2-!Pg)1Ny8Ed(Km(
zPmvm1lwJ?j)uBw3_kt(42UqVDz-(l=FxyPfK)&PHpyGE;dKcb9D{OB?GoA53^IRpd
zd*T6d2hike;x>!gXALG$(R!Ab5qh;_WUfCd2@&Lcqevm7`=>uHlLLVuzF{gN#w&^y
ziUmEVnR;vOCxD)lMn-=1OYU+q#3_lg|4sZ~6mZ|yII4V#sQ8(uPda<?S4Y~DqxjS(
z+?Gd|QKk?X70i|`eI=(L8`K^0Y8cbn*o^dKR+@vlz4X1`hDx#pTaQZKii~dp&VtR{
z&toBc2G#Fe1BPmTZ2o1tuQzjeD#j5|W%cwzS3XVC@!en*rlZ0E?l7&t$mBk(_f$t4
z1LeBaiYYd+|7?qO`T+Yt#PdI2A%_|HeQ8MS?1W9K#$=;ch+`%H@P^kb?-5NCba0hB
zzX41n`9Rm?#eTL}=PW=j!g}-#OlPPe2cf|bcJ;(mhJpt^Y*<;lQ-vZ?hsOk6Ij@B%
zI&_sE-YU|-JODN)GZwjjw|L3j836m(+>}XE`jmrCrN)%qKINd-T2DFXATG34w(7CG
ztiU{bwwWq64Qh={^)1PFlUec6C+`Fmb|C3klM|zq4z-?CB1s%)9~{8n%8KQMi`W@4
zsLIi|Brs$cf0>Sst+iZNzO`t^`WpFoIqXs+t6JG_qTi`+c|KhnVT!nT7iMbFDs6^^
z(wtK#1S@MP#6JmiNVqRZv+L%k_^IEfp|5#mIp{k9Lj>Q;8AbkmO%yg(3Xs^JwjAAj
z5z7*9;L0Wh8W1@zO=54kjGP_pe0CzH$@cwm!d^t>lyz(<zx_MYlR;-W1b%=hRv%7a
z!cxc_*j$XB&2g)J$DP=7cGq3|0aS_3P&S?SQLQtB17P)EVJmDl%jWu#(C9_PtE~kK
z3lb~%pO(Yfap_GOR%%m^k(X#Km4pgBI2jzG*#F=REws`PCXd#8Y!jTtbhi_1pf%s%
zFnWyMC!kegwvP+t?m^U~S-9zkXiD}~Z_jPV?O?$|G!;kjw1Dd>nB`}myfpB~7j?0$
zX)Q>S+Nv~Uu4vbGsH>G37w2>eqvXKOcqvq+N}+>Tt7W~i+*qRr)NvD-nEG*VKT{hQ
zgNkoI==yr|3+Bl)YB1-03X3jML+fmA<bbmdYFQ08f~D2UGm8z?rXC#zzOyt6^k2dY
zdS6Z-P$R6^urT?4@y~eB3~{wHT1IXBTq!w%iPK<N4JKl}z4~6?Y~8S~A6ml`$}-cd
z@{;vCy#ASWK-uV^uRR;Ws(sV-P+ZzSSN==T8VFM%ywmCpcwC+{S`c$N)pLvrU<1t9
zKe_zkW7)!O4rVKtJ5n*7lyNCQ77ANH3YvoJSduv}xcAEoJQy!h#-|<d5fxnWQ*lM`
zAEYu|^30wFT%RjBe9~CFk#9v6_GtYD5g;nBo`?$XuC3jCZC)XpEtRpCAp(bkwhF2H
zY|{n!2d+X$H=DKx!(C#`N^~Ndg!eV}nsPCK_x{+W1;r2k7fk7#{i$u}qP6wIHQwT~
z-Zmddht&+N64cK&&qd;$T=QoBBKT!6<SeV)Ewy+SO_sU93{9qCD>}hu5}TdI#-p)1
zPX+Pru-vr&6CUtm0kJM5^iaQNBd(yyiC4a(i<H}fv)6o`7WPQ{#8F*aK+M@AwQs*v
zt8O$u;vY&0wOZM}r3$?5P~bV~wEN+5KFqvX`P;+(PPXmt$dHKE;&g6(b7`CB3MJjp
z@j?S}thZHQ){Pe_-2~3;B0z=oYk9V!<jyF8gTV2{{o;=e4_vCL^`U|Itm6%L7yv_5
zFep2Nc>6LOA5Jt>O*ucHhAgU!(16d?Q4>hGDQxwTDxxbkJRch<a)QwPLaGxnHjO^5
z$#s4?=a!^8wRJyO1oG=Wy&Y$70C%f=vI1ah^~<Hm(QQ6ZApmP$o+M^q!!GuP>4{9T
zSa2rK`kZ?Qo7p;eZpsBMWn7s-Z&j7J?{O(7O?7f-It$Mx;B!W-K-dHD?qI|tHM8-J
z+qc-(Ijd#tMc3W<(`Y-UZh^iB@b}<g0Pu8sn!ZeMvhN;b2bgMQJXW`EDss#&N%~|T
z9OO#s1GzXH_5DKx9oO!=E{$G)6yTf0gpoTAs+*cFPd0hn<Tf2zgxo{XJ>3Be`6Mis
z@=_UJ<^i?d>65PducQjtnJ)01mxu-5lbK90e81EkBdcHK?hL7ZeW;eKq|Hk1`El{Z
zsYwB|ySo4wwNVcZ3?9u)<i@~lM)=%E@zXlp@d4{bGkbdIZLpc(A61B)0`KnW_1CLY
z+J<2z`r+~r%x=r7K+{-W?2r`oEQG$01a)E9mazZ%xM$vjxBJG9VL<Ug|AK{4HF=R@
z^6u@ZKT9&+HV3;%2H4WVgmWpbePtMK+6>^JYSz~xo%)Kz5c6kuJMKkkkO7mA&5Eq+
z^iOfi!F#{*(nX7ULl1LM?^V#saJbddMgONgBs90vQq#8CAYNFXc<7(G^K_vxj3y7K
zc}CB>eWujQjrUrdi;20?wm?~CQPV@PC1;sK$qc$hl%-1YYZmQ0gdQ3ji=5uP*nIG*
z$U0pqD+9|HKLJ;F>)hS8C*Ugn-@x_HN_0oWB&$BnmxF4+b7Jk%@Jk6B6*S+zn+*^Y
zr3>pRAnvBjZ7XD#4coXu)U3y&fbZE7$Ruald8Zgi;m9&sxee#k5%)i690k5+=E|@%
z4+|GSTk?Qv!xa@Q8XF%?A5mr&!h74SPKs_8z@yV+iN`CP*`Ug;FJVp@*yo4)c-t58
zv&-@lk<1*Uof4fGF%A_R6>k>Uv%oLSr6ZQJBSeyraO8jb4;lI**)H^KJKdeW7=%F6
zT%usZKw+9YRL1x02ptLLwbW7=rOXpZ%24oGedxSjt&V!Fst1uR^m}#bb6OvQ-^UNL
zTj9B`DZ4(&Z5cm=BFT?#zX*+bv2701^-=UeZnVM&65yrY!mwfPF|R@N3B(PLbh?a{
zy=KhQXw8ErUo@Y6H6WBsefu9n$$xO;t4@ln7GP{E^Nws339nie#WT%jsga<AFF9-r
z_S|I3_tx%&)*9oXH0QK)i)Y<2&u-y#cf%wT2<cq#vTbK~vUL>JjLq7HW3zLfa3Wfv
zl;{yX{shrkxR-fZ397=*a18z_N+p(YItRN7KfgfJPTh)+8<y6`D2AMO4henCZC*wm
z($@KF(S0u6?(6DUEM?mK{*z8gE3QyiCeCmE3SJ}eG6Kzt)>cg;#r3Jnx+7RmO4UUz
zK!y$j*}I}aPM8HCGBQ(~^iGx8oHY4_xKwYm6NsZ_<MBp#Twm3AZI`Ij@U|;f+LU}h
zxx1w{h^lA+cZt*pd>HEw0A~_SsaRMXguggq(d|^Y=^8tEZq6xy1j9qd-|UissLdZx
zLVie{`Y>g!%&P_1i|2caIY)UzI<Zm{mhVeJ2&dUJVlHgH#;a*>2qtPxnYYJsOB+eL
z_{zMIo%WO0rUJT}xNf7a5p^fmy;5X`!e3TR_~_l<m@E$=u-C<4SV1v^x|8(ZRfQ>9
zj&`BDP0X%8`Yom>Ofzju&|zMBbJ{EStD(;TeD$4DhjXuOX4dm|@^Xhmc6CTE*AT@T
zydN)Azlt!0-)<Vk?nI~BnuqNxeKgnC7|E{;6>b@XvD^_0_5I~_Hj`8K4&Lq5`0c(R
zG9x=z>`6<<CO0j@RuX))7XI=T9CU*ShEeAbUU9d=clBwnp(~}^bE(T}UU2XKF^_|P
zANZXkrxmOo!mG3SK9Qq|>;sR*hCul6^-b==e9ZivAa2?;Dsi#RS2AW~kn_h*EE^AG
zNyr<$H~FZDr0MOeUxw|vRfziBNpqxe@)V_Lq2mx{$if1YfXUd)8~xNR@Z~|2x1E=%
z2k#(3K2s?rxmVu<IjxuTBWyE>7fDzcbF1RRLY26HT(e3n71M@wo}g1qI)&p8wpTSk
z*{thk;dbDUC@sLrmH*qk2P)_01esXp1XY_w0kC!n1KFE%O02U@2oL_+Hf}*txvAB9
z+-HrvvN5#H>()-l`|poNPo!cx=knSeZNUS~d5POq-yqD}Y5I0`Zu>2vxK&#L(GshC
zIL70SHAilGJEXAm9<CewVMe{WYrP!9{A@A>Jk})M7)vmh*ypphzlzpPs(I`qOU(8v
zgaw)z6NaW2JW+iVAr|WmBd@YyH9-^RbPL>=id>b7ryyG}RK8K8*F*L0-n$nXRZ&#p
zmwk7Y(J8l*6nvXI6cdC0!mE5(9-w5`L6PkhheDd9gW2;p=sr=K)N0@>J<vInp)3=9
zdyWD+JbomEuZk!pLj6=lM-<J03!d&QTiNcJtPtMAzrw7#RW}<yvef#Ch7yo7KTjn_
zz0Ie5x!#H^k1Q@YL&R-yrtu#yKwERs&l1VR;fQ2v%(~(V=yJW|YxB?<L6M2OgXKbS
zHP$hYubE1t^ORI-LW=%NE-1U@3Jz6doC!JDW$#JXq~Jj$mLij}*a|wzuk51i_6T3#
z?g(Aysvn+L^Mm3V*>6+=dY!QcDu4I`f!sAVOk9qZ&G8DOtD&<JRw!T$6Lbu=$r}F3
zDtAHJ#Vyb!621`aWme!U!L`0oW#{wBa{n-8Gn}fc*zq788JhUo)L_Zqrv!7ik^?;?
z!?|3w(>*E;HwwIFeRkg7mfVf{7iVZL8q4vPTW(j0eu2fbuq0R$<|pTEqmtpx-aKAY
z7Q;Iu`CCj1eaKC+DTyYXVhbSmv@|3*yRUUCOBw~G1*rty1U24nwz?1o<)VUN3$jzI
ze}@sjO@mHPh4}$X9*-g8;@yV4>=QJ3(;c3MC(uP^3P3Jg<-fNfA4;qT+%xcH{KVpq
z`0+AD4iXg`@v80GVZGke+XCkwDVQXvWSl=blwhQJh>nPE9ZkCJ<EU<YxPqeR2W8Uy
zN7Uv0^R%?J-_7^bI#WHJAE$F8`B>;UlLH&oXzevkbeJ;+$)Kx^_;sc9=i)FN{a5{R
z1vMutudmm>ccXgix0j!9Z%qDpQRIP_nMqN$s^+>gP?W`si0+i>uVB4@LDzQeO5@ZA
zl)Ho=?%@b?KaaI8f*vu9@u3mp4QirAV<+*4P9e-A@beZcW~h}L%P}R2Ee>LBj9JvG
zB#=m_M};4-baCtC6;Jaq$a7GG!vxe8F{&^|x2oP_5E$#Z^#jyp3BUUzx#}Q<eBO&=
z(YWq+T6N$q5+p=~8MIcpP=VL%RN)-q9~x<MFMlSmstpL92^i<z*j!Z((@8(Cf*ckU
z91v5gwfQkeJ?PzDIp3NjWJQ=ot48e}(0(`o{}x$JS?0$1e5r;@L^;XB^*e+YTq}_N
zIAUlucGnizn$dTOv`nO93KWbQ#`~<qSX<%3#4PZ=k9K&)cCmg4*K^<bN|_ezjCMad
zxV8Ep-GRNsmv(@|_4@`mTM}aP%|PmMma+O|n`|^ss_G?=S=r-7NL|+i=V5kCFNkfQ
zEYoSO_mm~(8BacQp<J~#JpeI`mMal9B|rE(Tg`g&$;|1pbAVS+ZXgrf-VVk^piT<J
zQF~C@l1ZS}%_Qr~I3x9UMM-QdRI$*c<Ur-b!^@vT+PF$X1*BGC90ggLyq0@#i<6;B
z-V>cs7L6P)cx5Q01x}Q~s?=KL?80>)-BNi_pxo&tnqL1PzUX80uvGY+uZLf$hvdsC
zOYcyb1;xoqSNG3HxsA2;>nN7Di!MEu)8o9qa}1oX-`{?A?U<d`{Sz<K;DNlQS@`B1
zLMPw#eTZ@c+|KV`tzu+(=B4vKAXP|>{{0%fMHFix1a+9#;ITC89&XV;bP{DMm+8yr
zD0xBhH)_xy`|I#wR_`daNSuFxgSYvcz8BHQ3=7>B4ZT4(dBglV-yb8$=#nh*xnJw=
z|4n=8y5%i#^8&P{SCq)Y)DW#KW%}6ff(D;7&8C}B53X*+pwkCvQt&HSFW1`;zOL$)
zxzD*aCkMP1F>iDH>!)TTcy&jZ)-Rrea*Kmj*+aGJ^qIf4uS9PWZV|2%yr-G-oRNH`
z<8490yVSR+w#d{|^>s+3uhnr@E|u2?QTlc9^G#hKTM55*8`NQQ#RG{X`AD~U?Xn^h
z6;BmG*Dj(~XgdAOGO1eLfa=JRY>50$Uoe9h*L^n&xqp+cCg7Z#r@#*%oT~>AH-Y<S
zwd_B|N!*ZrXUv%Wa65Cymv|1`cyNJj%wV*}*B*sG4xJP<Km*PmiqSm#yx_xom*4t9
zA3A4C<8YFzHh5){JXTUTCGK>(aA7HoFgVR$clXoeX=b&jI?PRmhPDUuqA|&C$QV(F
zSIiZzoT&;i7WwGQyuB$3x<9i&eBx=I;s2YbS?~e%+4NM>4#MlteWU#eL9Ya}bajMv
zI5~Jd@)V<#dh)mtCU{vj*YmscE%&S@ag-Uiwl%6l5#=+O4!DB`g<(_1rhOe5^@NT>
z_#7M;Lv3h?NrVCCx3=x}6kq6lRiWJK2UVe8e`g!D;H0&gX6|VXoZYgOU9SGjt+G^b
zs4lerVD3b%i&wK*WOHkUyAoIJy<5Ka>y3AwFG-_l&bN}D8O*J+5>tb=kMp*FsI^uo
z)n#B2*mk5h@W)*wm_OPKd>b_HuW1dM1+~Z($dg}zC5sBlo6C<E7pthVb!>!uZc193
zS5s<G-mHP9%&A`oeG4_PH{BOC=PP@XOFFCse|qvGZ=(5CFSn?+zD0(%RtRgGY#Cry
z6n_$}eXdW5jP_fl?!&TTw|oJGv87wWtPZ|^+f1HIU1l}n9R_TMTxq^ZsfABUB3X5o
zSr>6-<KN90#0!rwpE03nh)Efnoo%~=ge(5EFHYYO7H|1%;(eB3f7N9{Aq`qRFZ>$G
zyp95PGbHp9>Ld*wRnpQpLx&P#aJN&)7~4LlBTIK5I;ZyHb{V(K=IZ!O-Po1#7}bLj
zzTlkacd=vn`lAtl_W>Q$nEe|nAx5t#4cN`(m|qP{wy3_{kJ}r9lo_MS7aqW4O6eL=
z>8&&NWEfYx^Nz!eD%C0t;kryME!tFz<qdCdyO{CSAR~aKvPMi>ISLBH$ZrM-^q4#5
zKmC|;dnAN%R|*n^*YAekfrl4AHW~^&$|KWa_Qp~xb2F3!La|#H_hme8+~kO9c5w(T
zo#WGPbPTI~RDYFi2Zpfh{0|!t5CYa9Ebdt?y0(cNOcF+#hSo@3qLD_X#~8TK-|bD#
zsrO)&B1EB4goT!eiFdE5Oy5l5zK!41Wug*H1YiKtizf=mRC5%8%#2&pgv(K#9&fR`
zdO@<0ZmQb<Ioi}p>(qtk$2%Kj$X0e$P8wr+Vi;q|*I<u?06BN@pi;&ABB}_g&a`T=
z*~YKa5479rQo_2N4D1qb`64E8<rP2#M-F1W4@^eREp*a9Wk1?oqkvUk^%#9<jsr{P
zx@E1|1w8;!nw`wQP5uIjpe@iw8=*_%cfcL(nh9fcI{xP3Q7Za|+`?vc!|2=WM<RrX
zr49;yw{Tky8iRRIFPdKl=F9`8<W#Ew!VmdR#r5hVj~(Zie|LztQH%>VjhC4ROjk`#
z^V=oGi*f)p-gq5{v&Mqn?Y*=wlP$RGqJ@$*ikpm-)s^QguELszMPlwRLY)@^Hge6*
zg7nuF(lsG_z%xTDpW-RXj|(YEC_GXSq`EbOal;RM4rkS(b4%W4;r(2mg`_0Du@ca!
ze(^0Q%5kjVA;{-_k*bRyPT%^Whr^=9N+u0<c`Yym1-WY{`-Ul)*w_$;@~EnUfwg?o
zC(ji62ab1&0w(=>EQ(zRcpyYacNR9AGMUileO^qNh<mDdI3oz3h(9jt{?2n^wsDry
z6pHs!x8`QXGqMWJbsKL*_cW71=*LzUiaoL?Jcyb+1>h)QXiEo0Uk60TH!JfHH@xGS
zOWnuY3XZbPYhI$rK;8=x&HZW`YOlIh<WIihF4gWmGqY|$D9rx{Nx8`wKN(C;a)Zh;
zuHWs~yPmnyLIZs2Jb`<9TcjHoh=Z=(Y*sPc2A#9Ypsp0+cJvkO)fDhN)Nd~I?6`Kf
zUF;S1r4>`MX^b?&&_!0r>-A~Z4vJG}Tss4~od7q-wkmwYuN<ffE<dr4QNVE~>QV=e
z+7HmmXeqr-Q_*Kvw>xnw(OmI>u1(7Q0X2CcaOy{YF(mii(d4VMe`RT7SuA4s5`a_@
z1@5scKJRB_YOCmNhC1~5$HpBMzn13lR<lO|huXAO1uS`67<*f>8(D6-5-gZdW5?Ya
z_C*SH%>HVdMCGBp+Cl+~X0>11k4P+LFd9uzfvChT<CPy@j)WEkNE9StidbQkN!+8J
zyu(EWRYUp>2@%3!cZY{`O8aR1uIn#P`Q>Qa305O;K3f)lQO7lpx7r|}k5u{8PB}@D
zj8t%>r^8|K>J?(r#NxMfv|A6FAXnQ=M6){XRhRP4RBdv7p#(?Pi_zW(%=oPXB@~of
zj|J+r;i~dVBql`s3@e;GOVD>zFpfk<@S4MhTBZ!aq@99ic}})pMjxF}{8wYk)s6!h
zo`c!}NX>aLRe7f3!sA5(UHb9j`7d^+g<hT8C;Vr}rHfZCBS=rxoesPL@|E$|mjj-V
zx3@m{pc9OHuftqgn%5$Nhdi=GC90ya1ktF-Y|8GWNGr8hCgxvIb$(bJe{x$lY(D4g
zZdFYV3l8<&tabXjUsNqI5A_1%GtOqD!jWY;GL!1M7?pMaJnv6rm&-~cE~?8*ySG|r
zV{Oe*6ucLTn;!Ls+-%P-0X5XmLma`UHZt7mZ~VO<P{9)j8nlK!Z|1s$dZ8vbZ+}!r
z#V|xf{D!5BDTP#vz!`=K6KRT**sMD!Tt6p9+zwAT%jJ`->P{>AZjgbdd?sJNW=v0#
zeW>PJi~sBGv_ff2P^SP+Wd=2)kL?jc)#7V$Vrv_o%UW7i^YGBbJ8?@BJ5bFPM7R7I
z@eM=)PjahL-2f><D@0$VxhlL747bVkjm4`u4l-Zrm_-M{%RxyFh)|p1Ljl^gOa3-P
zf&<(06(JNb%$dNi6$c`>nam6-MjY$AbF_bw#zLD%h@cMSI_PEfye`b}kWxSxLkks)
zFmi(>89E~{gM6{P*YCDD<Kt{!b)>dWTPT1XOZ6(>e}#U-BG6LS`|>sDllqn+0(DbV
z3d$n@dL^?aG#U+Lh1^+qvqo+<(k&l!->$>@oRWAkWGs7>O=H4P$$R_8x{yCdD1Wd9
zI;wDj0B;8G&ljDQ`Z+bf$VO9nBg!fOyKJsVjCb!0M*rW)KJU@;k9K)XbYVmLp@nVw
zXxcKab^@cR?RdhYZ7IIPrnnk-`IJYp@m6lfABLTKA2D#FR;ovYgaXb&zZI)bt%KK~
z2k0{v;via0B$#NRAr{;R&t*-hTlQXTdfIt6$?;9YmWJdX!dcgx^ma4t$y1rqAjblk
z(t-;zJk+OJOdH_9`B|C!PwkyIFvv%s4p`uNt8WqQ0qLFpamqqt`N^@M$tr5#-hrhb
zMe@0DH3lq2FAJ~4jw)GhsZzt`@b1NST#ISuO7y@1#f73LrPRXV_~`4iw-~bOy${p0
z;xR|RBANQ8`w#Kb;zuPIS`r?uJ44D5jxk1-WZU?VT7vr%dUsXi@c8HL74qvJ9BoXd
zSV6nN75y(|>@bD;i^-WYJoc7>S54t8jFuMSRn5S}*aY@ItGy%(@xIMJAJ@*vpo_t6
z5zp-tg+_q<nkCop8?Sj83iwZ(a~Oys_((O}%|q6NcZt{iy@EZ<!!qyCxOMHd*h#j&
z7(POPaIvj$=d4-7(!X0lD!4iU0*djq(kPSJohz>{Txmd^950hQd1T>d?X;?)%8BEx
z#zq6QVQD_5@63LwH=WUY)D}1imR3stmc^!Ft;d-xI<K>Gck<dTJfhHA<c053oH&SM
zDqL|Q<trdai-C0j%_ZKkrOVNh6#SE;rFn~$jMU_TmR|yBjuUF5I>6CHy!@v#)0GU+
zXv%)t^b1~G;SX3d-Au8+e3acX5qi-DHpEvKK>2(t(lNP?vu61PfvF!{K$l^J!hKKp
zOp$DkOhNn+MOs9k<b<iAoLu_~o5kQ^xymx(R{%#O8W*=1(POz}*gz731{-xpTd(EI
zU*J#7!DIfVL+M=7v_mZMDB#Qy-9QP%VM0mVG>;DGiHl&k?ekEV9dU|auHRsFZKB3Y
z+*g)=@6`~yMY|;(oDqCSv|lN6`N0v1o^9Ltsfu|m&0`$8#jgFA@P-mUe1Bmb$HLU7
zPtvgF^=QRau$}-`kySA{{6}*;0yMXPEk4pP6iR7V0Uhj(LyTEyjQ4O!W-H>PwT!3q
zfOkQr?P(Kt-)!0o;((*vr*r}p6?R8Mlwp{a`z(()x=N_)%!d&r-jTblvyGNlQ%`ZC
zu85yC9k6J9BxPscIC^)zx52p{9#O`B(EPz3hm1XQV+OnD-r@QftS?}fHy{gMV%1j>
z49rp2y~W5wYRbXOOPkYeARepb1zDZ{=l1>M{C~zy6mZ)*c*dG?Weomp*iTvDk)oUS
z`c{mY$7OxuWD>udJ~ci|n?0kHq9b+xW8b$o3Zz7v)yLqF*SAi>t#xaDw@vkS8&jr^
z7IxuAmaO>jlHc2+z=`U?XanXg3xsD2Q}ykPULkp>6G{8*0A{&B$6(v+9w`#B;%Fa}
zp+d0Z+EKkk;6YwRFt3O;eEhlSdkh9vkdJ`y4Boy9k9iVKYIJWemqK_sDI(icQauvR
zJF6Zv9bR_<0PU1!(7vkxfI*Yl`Bd?S8X}--<fn_h1w_2<e6TNIv`=m0UC0J{k_Cq~
zrpPTYq28)dM9>XXTO+<aZ@ftWkDRiP5}hIBN}8jBg~f^_3y8-?ObfpK36RcviZyvZ
zE{IhBi8aRm#+s-8LgWK&;D@8&97&xN#hOczP(#kgl)uRn!?F`;yXub%Z6SUUN52=0
zY{p&6b6W5LHzROJSfa7E;N@H}CHpu<!9SH^kB*JLKv|gn`v3X6sZl=_BMdMA&xH;K
zlT-wO4yxNJnT~huso$$?svW;iKl6zjo(>&z<Ntl=@}Fd|jZO7bH8$k!KLJ8%yy)p!
z`SN=eMXz3Tgdmr=7?1;9pj{wV;h*X(1cf7XuB21!RN%;v8Kd}|7T~pLZM|vsQPK2H
z+m2ghdB>KJOu9vNbP-pd0I635g|*f62c`67Ia#M6>OvP~IYO}aPOwF<zNfw_J4Y0-
za@weEq~NHyC5u6LRc)D(k?`azhC>I?j%R(by1T1}&ZLSd^G3dDJ9@nP3L{_`5`l4o
zi)^Z1=4B<~3(_P6mMdb`6j#uW;x2}-V!Ch^nW{>WXxg4?)DL!jN3#zNuzN>xN~=Z1
zu6(4{sjs(8I_J%xo}DciXoPLDZz2nL_IMH_XFZUv9%)FvfWy5U@O4ZK*&D9&n{h>1
z_I9V9K+5?*{>JaQmnvbK0R;pjy(|-l7e$eS6E-fl3o>v|&ssL-IQN2-HIZ`)t$1;d
zn-bBF(meJhB}R>J-f5YMlb3u$%qXYY30nXCk@1J5RCxwr(@2G}j>M*Ug1}D^pm%#>
z;xH{;7XnW!eCEsuTC?8mtRul8#^TrWt^nRj@NA9qycUZR1@lXeLVvksZLU9kjUZ&Y
zo}ZhJ_5lIEKukz~^US+lWvZ*Xl)mG$i?XfNTj`9k;l!NQ6RzxCV&e_9h;8#YsjIx!
z`SHoG_B8am@g<Ah(qvJKbCz3rO+6RRY_wFTeoiVDjY10XtF4jcKM(vraHk5qKilxQ
z%cp6v>uUu>xR*RzP%#!?c2(_J=31;XUBt6jpX<o=Si4MF!#-_tKlQfSpvz!UO6I&6
zlxWsUItT*eR<%uJwibL{_;eT!M8CYrhBvO9hX_B;5nDtbs}})Yh1rPn*jDSUpBi;o
zLuG9h!XuoziBn?LrY1gG)W4Suk)r)U1q^WeGlBgkIK)H*d;KTY(eoQ2PRUA(Ee)Vi
z??aWbU-8pk@_yP&JC6@}+8d8aFNXj!_2V`5hrGX${oW8Z!aC7U5_=Q}ear_DM?YGn
zuDuFFnKxW3gcwU!(Sp<#9xJvg6yu!&13bIAbL@(xkB1?hL+uO9mvtPRAE}Z^O<ctS
zE(u`Aq3%z1Yio;-+!nfNexIC=8}KG0--?=z33?St&^)$IRx8B@aQ5|vP8WR+z_&m?
zotx9TcH3=)?6TNT$<@AMb(_71rZHFu(ur(>LJHicr@tgds1C$$qgO5a<CLb()Tgn{
zd1}xa`@9kF1|_IvBr=Uza3D-F8^)%pkhqKZU+7*xJ1Fm4Jv+)9y+6etQkjXLWH(e+
z>LW>HBKhm`q06~trnai4*x-3{`=+i{6Uh^e#7E#tisEFMccv5D{l@1yPAj|^0?ZG-
z$Yxu(r1JV9&~j-G5ma|-63N^$dbh&>jXL-&I7P$nSle#lI)g`b7+wATz~Vnf=J&9f
zrDZQ=6Vv3KOXo&^w<qD(d?E+_HaKMA&N#UCwv<+TgQUdYBV#3d`;!lK*6w4OBLD7=
zSBbgAdxz!*9Yt@q0YX>Pr774hWiof{{7{8s_3`HTRU>}D1KE_2>26YaZJ57kXv8a{
zx3GtsVtoq(gdHNN*jQbQ>F(5+oYne)&c=`DZJG@M65;M%u7fu1(~N=Bf};bOK<#AT
z<t{fW^lhJs>UvS?)a-rdih)MHGWVKto6lUpDNV!Qv^lu%r)BRu@mwa7<BV+-a0{Ag
zu}M+NS!P<HvlQQ=I$7wTPmCGKtNvQvnM%^$({ooq5b3$M01QzNLeXI&fUWJjz6Wqj
z;Z|^Mek;B%<uWG?IgFesT5&4@CY*fpp8j_;TzhSWWvQQ?5i~;0Lj?m({PY3_Lu>{9
zB$Tm9*P$aUl=WE`za4pY!q%lpdZ<Pw{2Zh0A}`~M-7)2D8#+TcOzs1}%`GPKcMwM%
zH-eGa(YGvb-v;wWGCY>~LozRec7pWe8s7FnujrU|r_V81In(&)s_-q}p-(8Yu79J0
z4U%7W@T+r}2&*a&e*wb*C~IYuWqZOUkceVukK$iP6LuS&Bg<M|>L4KY4@4YF*ofO7
zq$&WBSTK9-;rp8yWjwF0Z2V4}c@vHB%LilAEtD}h-VPgq#Mr^~@7*owi-Az_$vlhm
zzi9Mz1zJXw$m%@9o4G3TR_uJj!Q{h$L3ZP}a2~=^BsoOjyFP{DD<WpHF5@n_E4c>i
zd6NZ^z<K~BN}=RCpPz^0iNaazZ!p$CkGoutuY*GVgv1>1$L0hyg4Sy2Kb&tiEQmJ&
zN4%zDTm3XoQp)i`Z}BofG-ZqLSP-1L+0=8s<10yD#~%b%D^W;N<1B`|@3#;zTm(vG
zVbWPOg&oQ)2ihiY(`GKv*(D^q?|#XP@GoPZJYEUj?ZpeaEe%q4Gp{6s-s9s-pBU6P
zl)0)bgdgVN%B!y4=GXi{P_3oO+Pd`lw(M@V8RNcTsyAec`mLrA7vq%#P|%`F@`MaZ
zAUw1Jpz9iESt{zt|5pUgV?7&@a@B~GaOf`!a1{;sF2h|w6IuKl%cq}IJ0a@&<%A_j
zsEr<IB2!&nZ2>UswUUNFWuO?5h^U01KSi@4z`81|i|%#9#=HLO`VDj?8gOg`&9ou^
zne!Kwc{sHwQsa7)pMpDC6N>>4@g^zDOPl$1{8}t0x*CvY;Ka9r3(qpR$^cR}WQ^dT
zg7C$@y&ZjTHQe!)<g7*zk8rmRquQL|ZrmUdf&uKpa#IOXn!z<@^IzPp5%A!y8C1l;
zA_Gm0W)Z}iDlk4ZtX4TltZ1U!t75`pk+YxZA4iA(Gb|14E*=44!iGv;;`IE54hnOv
z+Y(6LCcy<SLdDlIJjec&<WFVsSMOylu>^{Tb<f11emKrmvFPEL+(w^P_89D!RJ-<j
zec#_&BS_a55s%V}&`=J0Ux>ysho+-!r67<i8k=@Qxi~2WYpC%BU7)UALh#kf=|+^x
z+dvxD8+Yo};_|1KX6$r4ep4*sb@*YsUUgaLoRw?1SnTN5KIGGEulq$7f+z|YhaddO
z%+ZdSMzb-P*J*dkb!&NgPCA*KE{Tt_c^>Dp9eFoDW=uofMy;dsJsnCtrLnQ^am9Ob
z+LJrPU2!_e1uQb>hAH-2`mLwVkoMw~2-mkRXeVhI1OfSOvKV(m{j387-s1M&h^niG
z$70*Vtedz{+&-~8?7pQ>M<WXwMC-dFn`<u?)gv@i_F0X9|8|a|%=eRJ`;AcCkDGB{
z2UPI5Nd34Q_-PBI16yEAqvCU1W1|h~r4D`vFaRwXA^d7R;~(;bvB3_>73z$4M}x?$
z0XzxJwt(vvTS*hTyGMs$n6O&mBA^D5tEV#7Jx55ndXW`WK-)#d4zidTtDOm&f%5Lz
z@j|V55w^A#Tm-@cOm-BYBh#XtvoA5sZ%`Qcrnn^|lH5(k8PrJ+IIv;X?~UAkI?MF^
zSRl2Y^>v6o8WO2oJ;}qT0lz9?k^x(WhsHX-gN4%E6Ec)6_<7fjD5$nl=SVW9b?GN<
zpj6^v?6|CXTPc44v#w9&u*OQ3Q%XKJ4h#Inlz@i;J6we9c?&e%*sj!~jWz0$dgV@p
zz#_@GHZN!krx|HjuH2zS3O*))S{g+vKx5lR{HlM1uABX^b2ANQ-sID0QrFKy(53Ff
z464RQbukp7NCWtqk;K-A>e#1M2JX)E+JX5wX>_X5%ABlFpHN>E@Zp(sS+(dCd$`x{
z4fyO8eHgxESFI!A#XY+E?{M>z8>fqc=VErrJh9f{e@y}0;-3;}hGu3!7uqL4oq&nq
zaEb;iX`Y42w=G#BW&#-s0_sFJ<9A7^vJAq;PA*d|Ah#n0$u;XjNOOq}tLAlpI|DNc
z4bmH*hPZ60g9Ct9;BAwQatM3n_`9$zg+@9^q}f)icf@*nhRrPOg&L$+l(UqjZ>Z~@
zS!F3>*?jHSsR%fXWX|RS1uXDuoD9@VbU%jJlm9ThEX(z#o|J)bHQ8Y}uG=<Y*#81;
zLED%hTw%BXhWsBdfM+N(DAyv!v5I{LB<wm$kj*alY3()6TnKN|=GNb;)v`E*@BUDd
zrA99WcCBhr_VW90+x@3M@jLY?)&(s%s0D-<8{RcPyE&x}@A-|{E5|&?&>_ck6F{79
z>Dyi>Ow&0livj)~6`ZaZo;xQ8M#E-NCxUCPF{MKsDFgzXA&Sv&K5e>5O2YYEU=p3P
zLxA=$4g{*eSWV!|^Z-raE~r>8r}AEBF7+%-CHI{AjYD2CWL<^lLezPx5bH(F#?lNc
zg0_7W@Eu13mR&q5cxkG1bLIVcZQa6RRSOh9XtXvVMw=;h?aRuZd5OGtsUeQyVVF^@
zQ8{lsV~qK&TWV^AkTGkR!(x>z&fd*PrRxH<#C(2GSfKPE(ylQ4FonuxtY-Pd;>2?a
zs9V#Q>v1yJ|No;k9xP><m57)`Q1R<Fm)RA*3}d6d87#TIE3wB&7)VTYM!A%PD{YZK
zaI=LmgbZ)X)+2X&o*=`Xt$?cUe}~oLWZNT3V=YUOGYT>;yb*T2KVs*5#t?&I<)&6D
z=4X%@R2{V{r<$DNUp!mEEWF4>Us5Opr9yGSUFCp;+ZsT6Pq)H9=14z$toY4RV0|gr
z(ISDUI<k;)ph=g$7M*5E|Lbdx<}Cr1l36{xN%$+CLf<hD;pIP=D~K<=>=_ORV$cgc
zi%R4sBurR92la4k)6W6@v**21x^&p$XUGN7GJ0&C6j8K(sITNdTpWbwqV>P41}xyI
zzZM@5xP@VNLaI;alat1_pX8~45Dhi|?cKw5zH#MOeZwd*&j$aC3q5A|SYBJrO3QX6
zg*=_YU%;WP9{Q3+K9+aXJ*vmVuatebyZ`U&JD3@sY}qn3S*N~@=(m2m#VYzZd~rB~
zAY*%=!E>C}wNNpj_;&lo-IC}%ywzq^jSs$ZVphMk(W6OtOu(nQYTdF21e<AJQsn|V
z$;oZA<sSgq(2){rrw3}Ar+T!IzWk{k)e`Y2%UfadfX|J176DgC^`;!eTIPOh@BLDW
zZC<3c1bhC+V2;YSh%WZd2qHAa2xj4<qGr-a%RpVbibDiDlr&*Q`f1lLX2t@%rsu^Z
zsNzm&0J<NfL_4YAj+o+Q)lhDu0W*f){@o-xp%Bok{-Qn`!rrW(lO72fnB^3-gb^@|
z{u-`(Y}_ixWU+ODvLd7!AR$y0Sx$3az{!yptsV~yWK)ty1e#_s(zmoMPVTK_?9!<P
zOP~XCz(5AJSU*eq;d&2Vm+AGNqumN^BS(zFhhCJ$DND#fJ%jdc8-y!~-Ez?}ld=)Y
z%T71ByfKY4Gb!NM&$XpP(UZLT+zAY6_MuLitKz$1DreY1Hp89W70;P&;rxX?#Gyma
zpsg`ShMRYfqC8S2T}<(1c)Ru%vDNY%l8>>MpUc<y`u<(<4lK4SpTe5MC4<3lqb0(-
zvsgZ-@|&#PSirmrcdM6ELZXN9$U$3=D51<N^{a=}<>j>rb}KcA{38Yd&uF+gErssN
z5TywKrsnid@_(<gXDH@t?}yR-{e=K?y2S2O%F?7Vs<}Yf$I%jH0erizR>$$+7IJ(R
zXqY`|=eywusXyHUKRM0DsneY%t^>?+(%+O!)?031Y}l1|_w-SJTU<%vD0oV}Gz`iU
z(veVWJ*vitWWHY$Oc>_5k8&@~wUl@_kVhk*k%7*Sp}*fetN7mHaMxlQ(>_X`*s+CT
z8gp%#)w}%F?Gj!O$nx!j+un`9a0}&Sy8j*>1=ODpgKr_dJ1Hga2YRt$V7&zL0#1{i
z4K{z}B4W}-y7J5(xGJaJFNStq6Z#_UE)13qVzMElpK)AqMmhCM3PvNX{W1=k_Je{S
zw8X0W4$%-!<>?$H<1%kZ#&+W|#ate~SoOiy|JaZh=le#{f4LmSDp;b-c!raIa_*CT
z2|v93wfwXu?MmCuVcNl-nY64neZPO~_ps-6zjPQy{D6uJD^WF~(mU&}E~?<9aRsi*
z=Yiz-WKcU>lU*H0JM8JlwIGNe!Iv!esI@{wc~_HNzgNq=I1)5rcM1FIoxJsjNs?uX
zF*?SE@=Kv2W2E0O*YECVi>Y?*Sn@Z+G=f9)L>lopI+0by>5Xa*e;7qXG<x&vnB)^>
zg{huQTs1C%L_483XhnPCF@rsI$CrK?2yyhIx{NIr<Ak^B3=(T&quv2p6Rsz~s40CD
ztnJ+}DS*_6nUmg8%diFUrn?TQDV|jr5m!*D>u5*S{#;?3vKGBL@Z4?MKUw^BxS_kg
zFbGO?#*>U924UC#u555DmGruMBg*%;J6D~rtAPRI5v&v(V&9a*p-_8n0Av!MY7@-I
zO88(3!FZ4hPf5Lce*`7)tBh0aFFGIb*-XQWGaF4(o1Sm{(NB2Zoys`9gx)#*_S3JG
z-6dK#VSwgkS3@w;-S<wH1x7id>h2H`VhGXv#s;zuvjX)1OGep>vV?Hh(}2b>7Xvr-
zr`9Ofal_|nowHelI#-Jae5WtV+4-=z7%miF;JyD~Tfz^F0uxvp_iq1p3Y}Hgx376`
zm2N(}*^K8Dx@}J=E{Oz`{@EU5*l`pkoi0^<RVA5E<=PYWotrQ5HTs=9YWUx@J(*7Z
zCn6c5O}}8rjIEakMM04<nTaEWJDd{#t`)|&5qTDu#Z5AgzFOV?jfO5eh4}P{N8-h)
zpwF%S?~Eb0=XE}n`Dnn<k2ppdxWj7PW6m=tUoAY2u$q@_zkWUEfZ6Uwv#pE{;||;I
z?SQZ^7V|bE{+$n}eT!53)IeMMf&4bg8bT^JTeC~96|}<Uei9gub5Kp*;@i6ML4%${
zT$M-lk<>?ztt<sPAo=R{KH>W_sO-c-&s-}3bYSSC#eUs+0CpkLs)2|D|7swcT2ehB
z-pKp6Y)NhuC$VBd->HXtKwdY9$bDOA&A75jjv=R6s}qf3wsS_!bv%`vKnvsZ5)GK2
zb)Y=$!UzVURSt;`vo1tOVL4j3L==z%lk(sANHPF(-}D($Py!<WVbC=1tU*s#k_KG)
zfcGi(7Qf!^xv<vn=2vW31#2Z0!=bT^P`mu<pl8*))=Bn*$}@q6fJvdmhC;Jqu=;w?
z`GSes0>wVlf$C)WLbCJ4e92n0&~M?KB0tylet%L~WdJtz1T|d}#BSq4-GSy^rnADb
zSl)X*Vp~AoCfV5@@3aCU%CM{xDk`{9jr6g=3|l+Iima8tOF#?WYc^CGVl~4-E7|9#
zvCEbM%+*0SJ{yxRL^wSget6#o74|j=knMLb#x_*gd^-;&!~8S_IQLw5u*S2=vEap5
zQU&1y@s(%1VB`l5V6Z?D#aKYdJ79ZkhOfG>Z+^qM#^K<sW9he&OQu+~yq$mz=Js>X
zkX-j&3GIg|wE{|KB04G4wHuVszOq7V<C*LR{#Jzd;MB{EuRqSJg}Ul9R_rCDEA=;0
z5=Wzu7Tx9eFKVZ!2M}QgDV_?z)CZ=LAJ==biNI8<)1SXxyUT$7+={m)WEjCun}z<p
zl=~_g<icaLNP~*tV<VaL;*;?6O2g)X;K)ACv*aIxllXGL^XW+G99m7g_0%n`+|ZT?
zi4-J<I~f}R7jrJ}N9eCJNj3c4<C;5Oe7To*41FF*i96QJKQU!6urzbS4~-49Db0Ad
zOP=MelyHrFjeRMB)pAN)WJ{|if}S$l1~?#;#YaU&?%M{9k{gqF^v`Ncf3IIOMhqVw
z*y^j&?gqKLT`;7E`6VpPeb5TLlPIY@mk^&;F^?N{Fkb78OK-rOSA}XSdfg=QD=%d-
z6F!_tgi6eM5KWzf0UurZSGBi?g|{w=UrE+<XoWXwi%wY9@zO&5;?%Mq%_zI8dYwcv
zbH5vN-J`l3Y+&ZC1D#wy)_%428oQhfm%BLV@Jon`UbQ&67e>_{9ULaC57D%p=Iyss
za&*j;|0Z%Iz3u31W8O+@tataW{!AAORR{(~+6EWo^@YC;RxiuQv}h@FrpQ^_%L$7Z
z5jLCu%TW8nYy7#(%ZH15#NGWa|7|9}lS@GLeC;=N)%adv<Ne)3=Cz_pPEETIYzyQ|
zvYKWn&y?-rOQ`u_<uQq701Z6K7(wZp$1dTeAwK7({1EBhD(v#gHntklx>mCcIEYr*
zh*@muR-c9iHjv<%KRN=VhA9)1uzI3=O38^!hyjv1a`#2@Z4=ng+reQrHs9A1rXmcM
zA>DI#6?v3eul6FCOx)S1`f}zT5T$g?b6S+v=&02#<YDT6iupQW@j%MOP{(IJ8rP1_
zzNTF6>-^^{nPbGePCdOe_R@gIkRQ~t1i;BkqWJB+t{aSM%9(hvXb2V=7QKCHm_1!+
z^-#2luT>rQmkdgbUY{!MeVJCajQD9AnalM7-0UwL!y)OV4m3ErhRHRS+9`)f6q;K1
zk6rq@inFD-yQ>J5zi)X>;Cf!OOxqH@tEs|KrmrE|oz$|u!%7_4=Td5dOD11||9Yx(
zvJ=;}0)a!~5Ze((b|_&Zu)mB`VBSH|j=wEUg`Z)Y7@N?}A@<&dZez-0S!=pwI7GA_
z;ak=0pr7BCJ8|M89Khjar$TMWcMe#vJ8h*n?0lpgBN4H_0rx8T(yzbI5*CF_ZhZ42
zyU?-k?g+{VuXxe^CR<Ph84SS9QKrow{Cjqemsqxd9`K`6-tFDh9?qa8oKe5E{M-|9
zTFPfX=lyI&J%wq*MXT07>5ty79twniJ&^1N%e=uSs1kfl$Emg^^(NrKx>m3x>lD)F
zYf<k6+)^T!j!i%_4o4chU=Xh`X;Cr@OS@^1L_;M2y+R9BR<Zt0lxahd7S8V(Z1gym
zL@O<X_Ad%9KDOU)$cU1x(GjVwVz)~By=%m$4~;QBDj}}Ht@BEH=tUMwMbV{r%&s4S
zr-SfxiH8EzZZ|^jfIj+aOpzD@W^wO8YD}$b`GKpp$#wbYwfA?cr*v8}O*EJ~6yY;f
zE}{Yq&o%4llHXQzuBX%kjsD}VJW|xf-(sXgbemp#-RapY2MBq{Ep<%<hDayjZ4$}O
zAG~Rw&Mn%=gD4y|5kVHiZdKD%MxH=(=F~<sucOi<WfkRGA(}sAzTpAWyh~reRwz5*
z^=TiOF)==;xV=D479u^9g+;eS5d1(f`SBs;Ip=o+O%W~as>>lPwagf*x%O<IZPZs8
znW4Ua8<p91XE9D~lV3BDb=@4|_|o|5^A~>d-JlDPdv9NwL(!-6?3DMz9HKXc)gR76
zW2AQ0=d4FRPDi8S$oH8p%Q;NDU-rLfve$GTO~^hyLC*JU!x!!QC4omLbtR+Hz=CH{
z{YaTv=h$`Ef*rwFn@eRyFEH=X^56WF9vI~`AQ#}N4qtU0jfA)#QN;CyC@IM5$?8%{
z7#j6${qX<pDqE;400T|_GvUOHW<$8Yt?bIDoh$nG$!#n>``#LODfXUUz`;!mf_q$7
z#ts<cjFQde7O2Z)-9htgpzf~@Q?$Z4Tk3{_Y*J(H0-)2{*vhH}gmp}%&bh0u9pbej
z*RlkoHE{Rr(C%di<s*7Yd&}W8N8$S^_om_olUZQ?!s|vFs#P8B$hXgdXPV0t1jYb!
z!2L4=XR-H*qPz6R7h;_itkQ#oBZK{j3!3sB-S<}}KdNKzk7@7q+pk-X0Rhlun;4{9
zaiI^n@a<v;jTAjM+C%g^bjduiC50Y?s9+l?V7nv#ZglOtCD_o$W=oP|!KsFcDJKpV
zN|guPv*pg{^mHv19ukNzJe_(6G#YxhkQx@0=V!SF{+YG_h7Wh@XGd-ye|btHUTBSO
z@-HI5u27;G7;!NFqUApC5+ld^C9$(DKu`Sr<7a~K_y|&0xF;7w9MNTeJ3<RN?FSh#
zZzG$%G?RY^eOL<ksF@@4QUnbMIKFzA%==m*;x}iX(y(fWP{2ph2jQKjqsk^b_H}&K
zMyE9d?TzUSM4Sbg)RlW=a+RT(_=<FpGte!%`W0RBmB}-A)7txBta7{!az#FJ8aul`
zvCLPpzVq`@I0942(4<x*nyXoHqdCGF^!I{%EuYU|BQ!c@`j6Rj&5}$1d2+{gqQz3>
z7DL2oo<$+dW8{0`iU(T;x=kPB^CXFE!lM|9Xo|@M^F|Hn_3bU}Kj<Iax%YW_;Ucji
zNx?S?@FS42<a}HM?AsYSgUWmfRXchm+Az*%RI^6z7>6A-;{K8WgHfyn;=2*B$ShhU
zXoOzY(@Sv(jx9txQfJa<*$XA@kIkU4FUx;m%^+%9LWlWT`AM+<l9Rxtnmi^Ksr~Tz
z5?f}tfnkH`YS~WXc-g5!y;EPDj6!ULA6UKX>eYmj-rg~q@MnZ&kBIn;CViBB{N{w>
z7DjPdripBC!~Y1VqK0HF=^YN5BQ{_K?LRMB#hFx={A<UQ?)VX%&eU9Gy&Ek+^%}tv
zhnD5aVA>JgjVjjtZ&XRV;%0d4N;LjEjNZ&41GUmzHyQ-P!iMim@9N6kqbnXwy@Dj;
zd(PwRzPV=j;c<wFyR91V;X0O<2KgU4h}zOtsVB%Et9Q?)aK0kE{#bxGefnOO=M-c1
zkiV?Y??cBSuA%Yh{N<az&!1f+r0EVFwU_+rAsMuhhos?39@p>7$H%kI)3-6g8Fin@
z{GP>t*{=8om7>*v3rIIdv*aKbOz@1$o+-lRw}7VtU;d!iua>I4?=Ys`vEcrlY24=;
zq~MZ_FSK^%E&p%f0Vrb^j&Ozs6S`DTWgb7gWajclll))Iy=7e0UAHz$cS<)1f~0ge
zg0xD5bT^Aq=?0M&>5}f2Zj^?Fv@}SIgmm-H<$XV~pLg$b_W5?cto0LQt@$5w&Jov$
z>wOykF3Vf$Ggr(?(Jr#`ZQy8g9}JUhUn{|J3a8#7K9<wzm3t$Lkxu9aGAUfcq0xL4
zYF|~>Xja9IRi8LcmhJhFE#TYX&B;i9nQ0G*k7{a{gU7%x!H10DZ?bDlEKR(E=q3ye
z?tZD}H2GyN_<Q>-2Qh*|SwxGX*Nx<8f|Ci^WC~7D<a=XNZMGH|*RU$O8?BPZZX`~S
z*7(e1H0IgkT|zkS=F4paSN_fNS<LVC#OITUs|XA4X0fxxpqt-6<?frD^liE^efN5c
zwO{FrHrxDoh^%{o$}L$xDZwd>qtgzKmkcMCMDX}boVM!o-Z|VmKk4p;?6<gVJq6#R
z<K`Yz{T6SGd&3WGp5Ax2qDwW}ZvcjVThCxc`ix_Sr$YA0m#(L@Uq`Po{Z$(fhP6?H
zJ*Z>D4{aVyPa-u!Neu^35yRrjQdc4mL*c@4v65pV_jxMHRUw-W2J>N~c8=Mp`dJrM
zAyCuix$QibpwoDhwJ7*obg0X#zVz%=sQU*5*H$DeBxU{QjTKdUoqDs_dV&-)PJuka
zJDI)b4-D+7Rwg%6M|`KhnApz<U$^xhJ~mn=nBnDc=+yNSeUUR14gD-|$QgpmAs$dW
z1kTWM3X(;Vg-{wgbgmQIih|}$f5Xc0WyMZ<mP#Y8c>g8b3Iv_n^KkiQe<SD-f@)yD
zqzmL{sIU=@g;=pa5t`c`+sZ1@)zHpXD<EoMMJ-wxPM9Ee4T53a%x~xgYbl5Zm6H&^
z1aHs%U|hL!!c9dgu~#;4w!ddy<8T#*GMcU-5t|z)g3&0SB2=GHeE8b#Q4VGtaW86q
zUsy!sls!)YD4*6?*BNIwsb3h=FYG>Z@v*F+ueZacJf2XxOFZajC43I+q3GjNkase=
zKYCBztNE+}B?ATP)&o_wR0OYgfZ=*x?KuU6pWHZ_bIehkY*X>@>-CxaLe&wqWp|xq
zm23mi!D#K+R3GO{E3>)+FNj~yST%e0K05U64bJ+v?`Sj0Dhx{$=OZtQ?P42(_S=-I
zJo2?v@G+FI?em$07aC_RYpWtSq&EkhK|{CoHBD!-d%A}g4X1ZMe=}ZX4<mHb>c;{4
zPfN*Knrd`SAR1ZaFmi5WgvA^D_a|irioS5(n|iKX#`vsV_)kaRl+CDRDtyY&p7pQI
zZZoV2%Rzcaw_b^rq^1kGrPScl{>}~tpZ{tm`GDyOq#jLGBh&)7D|Phrz60{19)-4M
zg{{GT7>w?8|FGs7$aB|<9bTto%$_$92{6{)-33&K#<Sl6=m!xVBi~2Oz5O_UWk%6e
zxE{9khCOL-XF`KE?hEu^IacFH2FeMC!t5yAJKU!ii@kCXIaDqgLnqP7Nasi)8q$q3
zxmuuJk_0!Y-4*F51*R4%k*&Pn_1w6eU5{r8RKS#t!XIT*0hEoCNefKb%tt=$d;4GJ
z2Dad*3&lV70%cXNdW7F4V-^r+7V-T0x`~;)8?nj_y3q_{INv|NcL+3R)G$nQ)60}W
zdyq|#O(er37Oz8q!X==aTip@Bvsq|N^n5dM^`fz9I}jV0iVTddDl83vM5jI2G#G+Y
zUPBo?zEXvp5q7J*`C{R0MJ)LftpU-<itmvyGwIzGDZirwKFcW|995~n?2DJ1mnM!v
z(KGzEt(03_9rMdcG+YW@#UTO^e%ubLguz5=g>Evnubc{)P;xrTY3M}EcV+0a8Yjn^
z@5u*cugV{|bOnp`f@)zpfmCJFkyuWx;F^`;`D_2nW&5>sNUh^g%c~@+40`9P0vH<f
zus{Yo1tGxk&fZ-SXJvxDZ{vA?O27HN+jdPg0);^dn3;pGkB`KTXdV$n@4#^6s)AbP
z6ohBvON{P>_6z-BN6U;Jvem&ZSUV+@v6~7TH1w>ocTXse4&CC%@YdZyBV!ktL19z5
z8EB-hh^NdVNaw=JG>(kA>~2gHp_myeCI9$A7j5EKa9xtBd2>tiYIc`^j#I|69dSB7
z?bTH#tzZjgbA4wJ?xxA-gv;dD{`)^#r=6msh>}NXHI^!xpkOq{+pg2}1Wr_Gdf{Fd
zJ5@;U=|?+p8og>=2tQthU;$~B#PxCS6n32i%MMqZ7A+_*9A9I%p8?g`CTVfoq%ncD
z+sS>_9hk6iV$~YaO1bF%LAPv0PeEV>V|Tuz{M_ixlVKYzY*`eY{C7C$fjot-!#t}c
z*i#H>50ks@5C-nm^D$gXZ?V%j0!c$JH&D~LvU#P*jtk3Wsq2_th=mJvH+tBkLS!X3
z-5O^bKDw$8igMvT%)(nlUWgo5i30`rkqbNq@WpRc7;_4E7>Mo<!m%?6(tqo<zS$gF
zt#*$qy7v7jkp*Yn8&&Oqq%};yPeGSaNk1k)XZSqWPT0WoOsAzd*KSYKzni<C{j0tp
z$@F6roZl$WMw2|VjfA>q_)BhjcbI((!;3rJwn+;g6S@i__d`p^p1&EcCV(jnLD%oj
zW(`;s(a3PF2eet_0&Z9d`DuHKH?Lr66)uogAw+XKwhG`uH8$b9zn!QOF6bN>@}Ip&
zN@j|@$AG-soos>hKlpmg(|4MZ9S9PjqZg8a*-1_hy{jd}ul1h;8v-iJ&vFZ~i`JU&
zE6ZMZvZ8_6s2azix~L;u`VwS6pKtAY72!RD^j38(OmhjpOnDVIaMd*Jg=j_dWrGi-
zHEQs8=NtS3^j|q}C~bK*!EL!xd6Ia1bL?prL(1L)IrUIQa9$co?Su@x-GoPboBMxo
zp5h~Tt!{aRoM+5jjW}~zy?QSA1v*r+xrJoqoq?>8Tsxsf^N7+u5S8!DB=k56{P?CV
zB>$!X*}@H5BKZB-DPj?6ImORdpVqouP8K{+*Wa7nSYvma-r_Qcar=oVo#={r93D0=
zhr^Si5gueLSi<Vyg)jO9zC7u35;1W}sb=q%+w}_m4B=VBdSWiPreXh>G^bzG$ZW-&
z);gQ@{lIM#U6W<6lPcP7Bt(@e>&h=$1*fuy;dJy9Ig`wTS+rrt-q2EReWYO9Ts+SK
zq!E7oUg;;oc-xM)iFI6;I8eS&^(FMMIepPRs~^6hf;Fb*N(`%3bwbf8Ya0_+&D0+C
zZykiIqgTX})3z|yWz60Zz(0_&6|Hny)B#oQu@lvg4WJKx*tg*QaFeKzeJ4*seN0%b
z_L!oy4__nU%Vtv4<(tq?kbv5UjDFA0JEy+GsS{~ovB%)4HD9BBN*}oJ_?1OgkHKCR
znjSH_N=|_sJFs^DeuO83VR-gS?++ZBvkKb}&b3@j_z^UC@423aS72<in-<|`(_``+
z8hW?kib^C7|K`uJN%>{OgU%0n(E@L%kjX^_-lVP#t4rMq%@OvGZpPSg&R9`!%d0GP
zrH*jAq{+l~&?`UG-+C~}7c=FH4!xiaT8YdO2t`@Kgqy_3@<HGwMl*SQ#grC9@#dp}
zcE&FGmy;b)#;?b0%F@X_zNDVk1_iIn*P90*URrO59jMlX$1vey_UF5>ht_qNE`7e{
zAZ_Jdbjm`ojY24GhXs0f`TIPV9<dtb`ebo#&DWtlFGR6Ho1#P&;4!GpT(&DC6lUU5
zxkEV5s60R?enBf}*3Z<zdVv?*Lxx6vYtZ44jkMePNZ;JzdFarbC95)l@;F`Jf%+@u
z&da-H^f$Dnimp`kQK}B&!`Zn}yJx!>?VKDUF*i<4KGC2l&Y@J<ZRxr1G;~|zq2HVQ
ztmB)V0;>%GWAgZfHNtWfj>-9DT+Q(5m-K}rL$ZyDp;bho>u;s0rOBRalU-ZNzu&4}
zsu)<)F>w*_k7pu0vWi@arrWMDv$%g0K{Qgqzj@Gz(8e#o9jxsUvQ%XxEL+4R6XVV2
zsG}>nLITIe?`XT%(Uu`Y`&x9ZY^LeZT~xp|)9jNtQWDhe@>m>V7__R66iyNA$6k33
zuvN)r8_(8xLbUVE(TeP0yUjA+j-z#nf7AllxIZkjI$i9!o*1$65~tLQVH1k<4($ke
zcRl_w%fiX$N(2>Hi>|XhO2kTaTqviw_wNRe>&(Wro0@(ZE#mCd@J%|zfDEf4yW34~
zwvK{mWw_2YnIO>ChBL8hAA!E^-g@kAXg5{o9+~!yc=q>}_u`A|+z-?QHpty=ujQ`p
zx_x|Z)5Bh^e*b0ErE3puncvp~O3spvyemaQvR_s`PX=2Oo=Fh%^cuv~a0J<5&1wPi
zPIrvCo=+AnZ+?wq_x?=xS<%9n3c}D<eIFxLyAp_O)wMukV0Zon{kyZNX(lf29l{Lf
z;+&f{>YTDON^rrr0j2x|wmYruPIa6b?YNy)-oA*&%+#m{EK!mU>##i!`d5eTXMY~?
zo(>{92~el~VT<OTBbA!U)*mDI*ax7Rode&#7!*q83HaUI;GpQ+s6R4%-I>rEe7U6G
zT|vN)g=iTvs5%v~g%&k@V1YK;OCM&z8Qs=e;X@>SMaDk_xsptEEJB3QntVivGZdkB
zU(Rl>@P)!k$8%xD;*RRDVsT40z0Q;fhMDYU*ibNz3_+^;Wl#`4>QsEviJ%4=DUcaH
z8!|qIm6|JxkNvaFsqeqkum5u>50K;2jhP4%e}v{sR=Bv+2k^|GEc~y6WYBHbg<c7!
z`kYME5~c5Wb+-M<QY|nGV&0qkj8!&MDvGea85SWtiYS-XY)~lr$#3_j`48kNl|OP5
z%Fos11{*;krcsTVSIs@IxZY-Bn8P<S#kH#W+)7gnJ1?u-w+s}Ji^~YN8xw(Y`6`T`
zm)Tn28M3uBt}(;@G=r4<cXjD(H}O5eGYUIa7L)1Uf@)FcKPF8n6t4fOd}#Khlo9mq
zCfOkmFYG7!h<&dQoV<RkEtvyrVLm{p7i(aVoi@lqhi2+zQof*Z6x4wc{3!I1Mgj?7
z%87{yP{PIrP?p$cft7&qJA~jYz*@y%^l2-w*{@C3`wKWw@s`$9hAd%LmBGFt*kFKx
zM6;t&j)aZY_rG1pd3T1WnAU&OeAuW{Dv=M*ku^VAkQoP4HZCz)yxZDA2S41pM~&;F
zVuONYX9Y1&DOp$_c3#eC^{eQiQakn0lo79&0rrqD<K0iEcVp5M!-%Qw{LY|=ttBki
zKPb@f(6U2`J&E#IR8o0m@$mC>B+GQjsGen1j|e7q2uEAE+4H<dhR1(V;ug2m@`+Z~
zvob*PG5nof288=bOw#(J%BAzOOK}N>rM9iiTBwh}`Q84Umy$Ad+^7gczxu`bV31Y5
z=NKu#xQ?UyOWEHm)m_}UoD6#~YYU=WrzaFvQ)QSKu#kK}eKGmTw-}*DL`c)HhtzUU
z+~LfxP*b5e%Hx-i$I|fb5ca?i@nz3P$3xG*`>s{Hf;nA3$j((1+N{q!-l)u77+I_7
zH)}F8kQO+HiPdh)hd#tm=DoGKR#PjJG}Mg|GKf)pJ)_OWP!_C(b1dqz8R(6~E=L>i
z+3wn@MQyvGib1NfL7Qnn9nMKVhG*+Z>TM!D{>dkgu#xI*Yp;9{=VE;ZIh;)KCs~sB
zzjbKNHx-ttnAAcB@((IsJ+&nna{5fs&NUmkm2cUT?<@_k&_-*ym_#E=|LTKUKI-(l
zsAiv5s0dZf)lV?p{ac7iSSl;>$?6mBnjKYm4N00J#K#q{_XLnn=62FPUWuKnNGt-E
zEE5q5>*)xZhVHD`TP#0iR)3uJ3q{|Ph@-HOf+V5D-<qI4^Mmv!T?6=nCHQX&kr8q2
zd>BeoCN1kDc8c=%bF%AUdxh((LSYuNSx<GbI^%IQ!r^vGmp|kiT<SD6yTRM=y{^QY
z``){(vaP38urD9y!@M<Sm44I6Dl2|_8m!!0UAAyLK^Lr2AW!d4dAH!yE9LRP%vP`Y
z(+!ptY3X|dR9Zg#K5E|cs<IFnyYm4J#34_Fll7taXQ_T2&o)okoH`d?bY^S8c?8Qw
z$?YXJU9|0b-v=~B_UH?t1lCT#X{py2%3}uWKR|K0p`>3l$2`yAT8T7{HCxwlD5b=G
zfx6N^x}=lBVCE~qjOobygn2t=YWm{Z6V9lufV%$$(cW3A^ARx|4gBox_XDNQ?jOj<
zsdr}C5~to|yW9wPPR^B21Gu2Wb?QAj;iFmF&Ch3@q(y5dlJv%A>rt=pLk+{gTy#19
zTgL|PZ}lGoymzD1`QT#u%-2A2@TFvlv7_c{j#r6uwY@v2$o>?Ogfn%3vHc}$`Oy@P
z4{YCj2w%LxeUIR|ccNl>n|WQ3wGVo6M&V(^gTqpGB<L%@7#5Tr;^v2>%l&S>7dpA?
zFa0GHAWA-PePb(U=;QZ&K}V(&KYCL>)8G{xoFJme-P&~Fd#nt;Ek^o&@{rP22`<nP
z!R;E}f0!tJ5p8C=aPNURhVMPy6&tE`5{+tohQE<+01rYsYx}H4?}=2J+IYKk*=Ney
zK7XF(T*Fr$!;)EyaQ3V#$q&=L-l%+Vj>P{o#&u48cWapT!E|e=os@hB1Nv*BX_OnT
zYi8BBrA1Y=_wl&HxUZ|X@X;J2f@o8Sd2^`k9sZ}iQ8<-0_`xDsX?t<qC~sS&;C++i
z{p?h?)kO@0*^AjX{tzHNp3V;t2R7ddw3%6*b>C|=@&}W%i7D;D#%?I8OU{a_U4R8+
zbF$Ragdp`wKi(8Y&XCVGhB7eTVa6cAd}(RI1@+Sg9Nf47M1Rwl!r6zc(yjm3ZfP_R
zssVt*n$#p|?pz+F?Gs8Op9Bc||FE6$RhO*HXjdp+c3nQ1XcJ@oq{(+W3<(4&-}EZQ
zq+U|=75iQ&p@-$~muugz@b>AutA)_!&2uqJ<*G@WHm}=iPNDA^Z;m)R2@#Uh@J!|L
z+$5wQ?fSG}a+K?MRJtoL!}n>hbc@c0m9h?TuwL7v9)5j|SHUA(Qs14hi2>dIM!wjj
z+`RUS*LZXfPRKrHGD095u(_O(N`qiWftD8w%#525#N7qW9U0qIwZ|I=i(Rwq9MM%q
zj+Mr6YBPgwH3q(F(`41GGK~eSf?k-A@+S-C6%Yq{+V8w@U-~v_px;tN1SwFTeK$&?
z<eUFCB7iAO4gbyav+vjtO4p@$M4hWc<_Q(5_F`-_`Z&J-usaKJx@Jge86qwD{26@0
zI%04Fx7%bU!GdG&$VTfpU2SD$;QwBn;51_~!CIQpOWkZD<3P3WohSUK0>|(h?p!Y+
z4;<Q1x@h|d&xEA9u+fzFiOb6`dV@K+^{c2T<!ik@k|g(F6*c2KvNgwR(c5!ozcAbh
zI_#zHFW#6Vxi-tJxaw!EH@+u}XWcVtoWvV>jQhzikcZleTl!J?#6YNRZ=k;W<*HwF
zCB>?x9d7bOn=M^R__rQQNNTmPh0MCJ8GgVM_;A5OIy8#>`3Sm{d*pR9RW1>u=sv1+
z=VcqXaM`1+xYIWpywSCN@SCscKAvVPX^N5Y4~kkVZywC)cg@Nq&n&MD*JhVYcbzrl
zKU0Bn&LnC&!(l?}A2N|0=W{j8s|kKL#rrVV@j6|(M~8$ic(Q)@#D~DlhsIH4llK~l
zqb?DxFE3~$u$KE<(fm5E4?6S?8zMK8%_>L{^ALM!I~GjI0Xm$+UU*WU`4g^R(+8C|
z7@rWXB&j5w>+Cw1Kh3Bv=S?0oIbH26Wx`s>twIT4Q;mZ)n|XWX5v<b3rGqUWK5M<f
zG>OVLJx;9}mOz}wYy5urh238eOaibEc5q|DA7(=`7kU3P=|<q#l<Vv0zFpEuw31L8
zsY|2rvQ0Kmso;a31g(H@A$+F;<<}J}rNyx9X6jmGc2b$|CG97|4CBh|m`gN*{Is9P
z*i7q7tnX=WI92D2?de~h_)A9jpeGlWW4lz;`P(^qp4aFG@(`n6pedY_^~!5g$`Bga
zXmzhE<RV6dUuVgoF@gwip^3i~m&)sx74w4WF&DIJHy-<`dDck#JiQnGkDqhfPhTOi
z>md1i4C|MIx>C8=OT^BH*<`Gcnon@oH;8AeH?<SzJ{9=WL9RLxRioN3@ciw*Rxcs0
zY-0<qHLXP9U_dF|;3u-LG~s<Nj!V<$QfLUjq1sBkD>6XRhi`j*#aQh->ND~D9JKvz
z`MUzCZhoqB6&8FoCxTDQT<*e15kJ+jloL$YNSM}tqyJ`9)LQyMIeipfl;p4bszot>
zP5#!n=*lFxlt$KUB05r6@1}-gfrOCVGXG0HSKGGshXb8CUGkC3$Q-=FbM^30&7jf<
zp~+0Sd<Ee}yl@+7Zrzth6*Rc;a2W{=aXMK8*B;7R-IU?Jq?_L<;P$fEmE@(oI~T07
zx*X1^T^gE3ld2qt4+>)=?>|TW($`Zpo`35vTJOa5T(~v)!tZV2xTUDQ$ji3l-<Qw0
z87{XD(h`^-f58jYBMmA$IaQ6vEH^4|JlUDBk?TwGqIlmfLTg2FoUn*;`^-VeBHhtm
zsY<-|bzw3@89^j8-=$*GTGXGcE2ZHIy8lwuHb)-e+=9B%v!CJ_yC1wfD|V9cD6@^-
zsn`8`>2GJ^C#Hh?ek<ejl6iN3on<O3oMzA?<C^a;ezT#-zsT&5v5n{s*E3adTi|af
z;uEjdvDLm)=;fsas+>G61z~}!s!qO=nVYcSF$3Qh!jwoq61cb)5tf#G6CHDGxB{c-
z%#qmpAs-28!ET|M_puA1*R}XXO#;!S4?g^;e0JnL)63aEcp*lY<z>b<5~V=aFzSf2
zu!V;4urY?NmdlHrNh&u3H(>tC{$!<9Y-Cn_S)U@*(X!6R(rjblEM1Y$PDae^c;obE
zW5wE`7V3F5V4K+=o*2!j0FNN#gG}#q18$>-9eFs<xLu)BX~{or$$uz6G$<uQ2hNlf
zEOhy1`oE9?tKfudXVahmJ_jcD!v01E#(Wt&Sc!@oh;VgCf)sa(Cj_iokt;)zBjv+C
z4&dSc?Er$Z=OAKGz^(>jh7b*3vA1}H-@-a*-<5^+#{pH6LEq%IhWAI61ke1XN<fG~
z38?^Pkn;0u*o%(vWGsxLFYjo#E9et=aAYZSv8U1C+We#($fpRC|8s3x|L<!9YUe-o
zjAw_!B1F0bBV1}EnB$7}aF@;{&tLq-mORIz;8Tq4x(R{RQ?LCr-{yuu9x>GPQUF_^
zmS=z$cCFY@Pp6EBHAmlOhgd>Ub`DLvNCCxf3$HMz1WY6PK2U4<odzCbPk^~D7mfv{
z5x-9T504>5{<FZBXyf>lX`QC@L!jGJD@wo%$1|<)EX519u=BZ{%z&3Lk2<&X?5EKm
zfoA@90@c0X&<gs1yq>VxlkXBoD)jcM)w^atxJ=kvE7VOn7MQbvwPReRT5jZxk;F&l
zM`wi0uO4S<uzYxq(YKzE+GDytle@J(5c15tFI<E9gG5a0L`fd6>f^^-9Qq%_=T!&N
z;LOmgFU=>h^Ea#nhxL+T5)1~43r6#5N!ItK6pl7R*AGGeAH6z+*u3i$BBc#nn?Q~x
zYEBB^+UPyU2SyjjK}+GU+w@;g66R>n!)?6<)|VWnZ);y?C8`pcJregb+!43vz$($!
za4b`kzx%xM4y%Ob$p%KBeAsWVjriC3c{V$NHNYIqe*DPtW}4$6K||(;KDh64Z#d{x
zh6rPAMAY*f!x7YSts`68xfKfb1s}P3B?>$&gBMK;9L;JfNoJFP^m?~r-GljhV0Xm`
z9vT4k8*xJPdg<mQV9vviIEsF=PCd;DLsZN6gA>&@U@T8V0CZ@v$hq1j93_%EFt+lt
zB&M|$aEiA~Pf<@ik5eC@7)IUR=4k})+S?~_AB{Dy9d*sE)vP2wWhP3S!Mbbed1kTQ
zYfd&(7pV~*6IUv?$JKF<dUl|dU*(qB_&o7c;Uig#+Pa5*nuHNGS&T!&m+&)&6^gFe
zB6g;cj#*xfiDRMc6*kchR$-QVSn;Eup6&BH811c}+oWO6s{q_g3bvRwF3x7zZ?n%b
zz>Udq9H0_1LAJut18LaRCY@0u17+%Rqm!C0Y(!cw;=^6t#_qdspNf8oxf`bXB%urf
zCT}CW9WgKv-NShCtw*u0vj_QVsuR8PM4N-?YgH7exOwO7r=~Cq(QW~vbPHc`Fq(1n
z;C=L<A5aa)S&4J;elUwqLsgfw4Xb2xtMvN}HSE&;T+v#)3FtJXc0mJyD2W~3r&(8j
zt6)QrkR*#|iaDvIVp7LfY$Eo6Zo<D#kW=O154DdDHH%zf!?_%(2>TMbw>bSM-ziG4
z$>zv6#m!8Ytm!h(xvY9D{Pp9FRyL_+BrDueAvOtnZAjwOEZCd>QAptKkG0*6344L}
z<|Ak-xZ|khfMDuSzf2Atc^$QrLz+jckzxDi%etx?lO>a;b~U<Vr%)|%%tkG|?4-Tk
zz4$JwahG(iRvsc`xV!IFMZbdR%O`?;YBr}aT&`|p;}5ftk*J+?l;sffe2#{G4|UGU
zX-w${i`oQ1%33PlC9x`P3PgXlb^_aTi}mj25|Dx2@V%bHB1#u>=y7XZg^@x+^ogj{
zVH{$_)**F%elV|<h6SY<%sP;IAWRbG(d^9Sfwh{vVp<^I`Ig~|=IB+;fV<q*Y^QFR
za-9G3`A5l=7E^JDSu0rH!@p_?h`7-^w5UZ{1>ox2P*L@~13tG}mo?7X{q$uHFG<C8
zVt#U-`-B+Bm}0K@vs)3K-Ga?j#AcG#28Lpr)0-_*l(@PGz)GR_JY!@7?DItJ!v(O*
z#Ez^vYtA{a_@vQR*wP5*o0JAN`*Fg4hm~;ah6M<gA8<=9Ry8x08as+*vV^H-jUGM5
zni6$B%Ms;1$J4jqs-cDqw~Y-Xy6orJD>vWp%s3FhP1cAQ`WH~vF9PoD`TVhfO9ilL
zf2&qd(#M$pny?2fBjQI-%#UIqy&pYqzD)w@SyvOnUTVlnkgX6csUy=BLdKNA{Twsh
zeu0n!r0U;al5qu_RzdT1!ay@X3HB`NbmTBGN0sik3IR&zZ>fK*n+4CLh;8ox**ZVP
zv6tQlKZiRQeBpOGxzzoKBd39JO#C4@LB1&4H@Df~6a9*cd<0d%I#pj)Z&Sj;lJXXq
z#Q+r7dEjd>i*Gy+X<<d4f18`YT9~Z@>>uFE3z+11VFry{d(p!clmmR?`)@22s0IGZ
z%z;{LU?a_7>&Zkc>3js6_Eo%s1m1*95;&q>{@?Z$Jl@~Nxl1>CeLPANEYlMQMvM&%
zYwRDxs`K9%*1+1otuhAG1(@_Qn6z9aVe8%U4}#tDPd%W=`#&w}|FBM&mPMt^18+bk
z{~N);7WV@7Y(guvuxI;wbKQ#QU}a?T8#6(uQt*L14keO}%!fLc3ak7KR52_okC8u@
z6yzo<q3$vve-`x53Z8TX)ZJj~2X~jDMCq5>&-d6}U>n|KSIVP~{5RS8Lzqb7nrnnB
z%fN#bY=!+ZZjpm2Przd<5BnRn%hQ#Ak&ee=f^}_3^Dg%M2BLAjc_7z=1Y2mzpQHG1
z3;h=hw<HJn_(aSxH<CUWecEp&QX&GFAT40K{_n2IzcGV1!K&pro=zB0Rs!kuJu%Tx
zh9PN*y8p4!jsIdRUx(%Qn($QKIJd%fw<Gu^7x)2=p5^~P9_B3P5tR^~^Y0J_MnB~<
ziffaI2ojFLkO-1TM(wTQ=dbZ^^#%Q;j(XOQn<^pOD+{w4LOwsijI1X=CLn+mo*`?W
zk^zVwS;5*5<}pbC2Fe_mZ$4HvDE#anJo2<Pgomht^DRCd-sf52YDMyNO$SXP3{Fe;
z@L7m0I0OALy<}rox7A4>S+u!>{DakuI{3zKDn`F7=6fg9NZ||a96BoqdFTyla>Q@^
z#yJpqxZElsM}095Clp<cV#dL`b;ma3tM>@E&8=Zn&)%>_v%s52eZQxfkeNKG^lqAc
zmbSKcBuTN3MSk?U*>c-#=D^G1msNb;xr~5<>6A;#S%<?HoD@OxX3?vkZ74sQ?titf
zCAz<|*h*{zQ^wdHDUu|GvRCp}u?g+ud0)ohyPO+2#^*fx;mSv|pSQv`Grkior2&ox
zUeDR-3k;~Y=8N+f7=yg*km9cBKe*xp`d@a^tk_RCBG)($c+xA5UbiUbrxJ}L&%x*g
zuB=-xn|j<;@;nMlx!*O|-sz=C*o0piaCXfugh!aKjdl!ml3ZCeC9id)vpjn6exPpg
zm15QA1K$;UvCdh9T=gUoVa4*$sR_RMI*XV{Dp8?V5dPz9510L4LbVcU*P|5?Ri1ak
zDFQatgpZSrFuoFVRz=?L-xrFj<>^qKWw2U}rpE(<w2iBXqW>>X1dO~Vho-p4Z`R@$
zZLPmTuZtYx=Ufc3>?V+1LR5Mbdxs4C+S!BNF8gw-;_M3%Ca&1ut4emn^ideA*C_wS
zT49TIMYn>;X|MVhUMWh!vGY51&W2!;tk-IXDu%t9li!o$`+93NLlBq+=)OsM+vL#F
zNLEYST1TimBvU`zF_bs+60~sKW^S|R4<`0cCB5v@tuUX+2rL7dh8A;wXcZjOoXmX+
z6`&X*3Z|x;N`Mmjb3fy<;ktAqb}RTf9)^D1`0SuxD_h_rG9($jQ9DB#dvrK-E`Nt$
zbJ+=qX%`zq?e*Ja_az%GyYv50S4j+oul+hVY8X1wEmgS>*3sr~ldfp4&mO>hee{(%
z<!a=)r6G|WL#lJSX!LT_#ehFaC`pCxMMNG$A*rMciH81Icalm5N2w2*%tkVz5J{+d
zMTl7plqFh#(NFx+eb#9wI~6PB*8S3{#{G+GIAUXo4$n(+L)UylR?W%C-HR~-6b=W&
z>y~s`oGKQyVtZFJOafs$89sC#fmK1ThQ0XUqx`%*l^OXI40J<wfyLm-9QT>c8skAV
zo5_+)=M_-kGQHcCmM6_OnhmPk%<!O>D-no!#@T@=Rn2$5gwZI-=N&pv)ouq_Hgk{r
z$IgBmb#Trta39LWMLaZW7m~IGJ+KbVb%(Y%`xMNALkafH>aj4zY~BUf>;id=CMSGy
zVa~$X=Q&~Re`*45LLFh(e0%-l7I;NsjdgQNEDO=bt*QZsdfy(#OPgWQrqwf?Ufp$!
zv<nf0`pt|Ae#r=jDbZjXRYTKGqU^+m75kwl^X5lYG$Ebka&09p)(80U1ZD8XYS+}|
zUlD|6s-yvacD47?Wx`-a=`Il2d?1qO`#}}_lRENg&TRkvD96U}@ccaWgL?S12;^76
zer~(?PpO3<!;xV^P_%QD4t{+wdzt+-btdk(c*|n!vxUE9&-z)PXR0&4bcT?-Tm_2Y
z`huU~P3gyH`~2NZ^AZiE%Q+Q<?;{fUFsBv735EMPl&5~00GhzO5fsV!X5X-dv#?B*
zRu4od>FWWZ)5eB~o$*^M?L~C>H;zkD?&XZtA(*K#(bUQxOb(*?cYVG%Pz@jWUU)VO
z9qjv@MW?4%?98+LuLbNvn9S=I8o)1vY{mbw1v}fU=C<ozK)GbPm|^jty$j8p%1U5y
zicF}dzW<I2bsS8MD0rXv*wi0-@uc7_HZ-)Iha4I0NdBJb>fO4+D^%fZ(jRY^k8c@H
z7d+G<v^)^4az~;8j=mIkA^gWKz5%J2eFJ8T$4T>#a5ulQBxiNpMWsJO1MSvSI!cxm
zAL`+}>pw7b5ghxAo%KozAy=IVb*9?AV>2N(aG%j%W}r%U|23ucldq+Fcx_9zV=x^Z
z6JF%n!)>l9Yqnx#xUnrZy~E8ka6x$6a<<kzYe38;VD}%j0M3cUYgqHRzCMj)N<RJ)
z`&p}7_U&@PITP)-1O5V-yNyHL#Y5&j>3mHbp1`AXsW6HYC4gC7gZ~H9tAxD0R6J7_
ztx@_)=$ROv2EGoy_pH9U(&|l)($BlCYo*l-X|hf-k*#V+zhA^_MLCF(=u0VZORpfk
zNXPe=*LY1O4u}NN*7MV+^JC{&h>&E-C2h!S^ZS=%cT49kX7{RUxxRHnohcHVE{q$I
z-tQNTDzamKofmQ)TM*FZmX~;Jp6C*UdKjOZ2o6RMK~-8t-h!deXccHf?{nY5XIpIO
z5#>L0fmVCwyQj~JIxJ*Hb5#TtvJ_^j!f8W#2h-~Z85e!94n{E2slG>*l+Z*gq3Z8T
zSiSDf2n~vMln<SBF~s6?FK=x2?UB3u-O{wFpTo*o2*{&Vi6-`Bfcsqgq9}973>>(8
zNb{>n5Zbc+Ow?y8!zlVoZ9WY~XhN`p;P~90$rF&vSD8>mAb$&=-rv+3ui8ieOp_Mt
zOF8uAFL$pPn?%3x()eCF&bHk8&#bUrUMO51Md34kJu2bH9*=49Sky}4_AV&iCq->s
z!vPdbqO`dZna0w5ZGfg+RKKFBUVqW`E*bO7gRmRdywATDG*Vw%r;C$x%XhPY%Hi_R
z`&|)4M09?oL}ohF*cpf0L6w~qz|`gd7dG!ZA0lckg4OuBLIM*?YN@c@Q;$8bO!>hJ
z7@CFT=S(RtW?qZw4Mo1J<#A8P_jOoqRJsB~eTC|;M>E?cmG30*jbkcAgzqYlCN1`b
zXDnTJn-IFvy~&8uH=eRD@@v4th=iji!HU3I{dWZ|PxFCqm*X<)KK@UabdBf&9xvYz
zQxN*|?SbHYSnSYPma-(;D&Kgc%B>rAa!>?hcyk4qNF)iKj;yftL$4KXR<s5+u6SGr
zR7#B{VJMn|xc8lFv5V}ZrpwIFkQeor@rnH<mv$;en?0B$DuUT|Zy(0*9)Dj7<iRXU
z#>;B@^8H4P<dsy-1AkNC<`j&Ee2P4bgApk;hiXD^z%WT<d)}4Erg>i}I`|4H6GBAe
z$LoV0HmZ`sgUK}Zg91D}!GPcoD)=JpO(e9d)(UEY0VUNMTCmstLf5}}S9_fl7Trzf
z6!k&6-|6*Gr(<v3b}_q*$-AD3q@hnU`!e4h<%%{(NYuJg1Xy<XL2Ux)<<+J|?E@|X
zVzt{u5$p#VPdZc$0nh0lXCG3A;6}p-Cs>+_2;uj-k=Bi1ILSORA0Zbpnr2_5bwk7t
z27ZyKzoyk%a!qWcsjh&wYA4ybnJ;(fAy=3#aGqir?qZpVC6jxX@u_dmH|YF1La@KD
zJ<&<__wNg=4Uw<v0G%#7!7S=rtK1jTlIJ@^MroWXq3AVDK|FrLAx*u>u4TvNd-?ha
zHAMwYT^dYvfF6m~bV^P5T1VyMu@%L|jEKsq+NNM>D8@(a1_$dEvaNb#98u}7dys4k
zRH$I6Yf-Pw){gJPmN477(PEmIo=xVyln}c&;<Gc7YUf#EJ|;S(#tU2O%Gs#If)Iyb
zYkjABnrEY2j;*gyPy46NRsZ+yN^a*@x(PUF1@EfQ67MdHQkfP<8=C*FZ~eGw#BQp*
zW@lL(P1Kjy8Q94!{#D(hhE%q+)AW@K^jt;z)+XT%ntV-9j$h&9%xX2gmQ{Bi(=uJ~
zf6+_k&uSk67f()$OYsGXz4ISV`sQ&<W;+p1Q%;hK&s@K*%Za3#=jk9~OKj!Pva8e`
zH?`G#XvXFRl4Tvm%*Uuw0g67d`1{(oh*ZAzkA?`J`8I??Mo0K)9Xi#0UXHls2G~sY
z@Jn+NV^w9RR`|#F{YX9c%4rx-5%_V-<wAZr&0YG!#x=D`My=w}aT1j@JYr}+qZWIT
zl#JHFjmK|K9yxv>$_zWDmuMyfF|4AQq55X8QsP4%4QjElQ+d{(ZGLRCrvPylEDKfx
z?qVQY<C8GVXOPVyZ#N@N+E|Rw@0K5rMA&1j(|vaOhcg1=XXKlb&aahl(Ab#BSk}p?
z%VrM4#@HQB(!`{5wRYA`5fNuz-S>%+NrH_-3l#9n&fbQUFCvLp7bzAYWu3Z@Ii}Zs
z+h^7ecj~>?^AIBuTu(m9T8W??f{xbtg_?8EO1NfJXCOnAdTx~8GLpLv?ST=%9GuN9
z7{n~ei-+S9R?D@$Px|pVex5?<zJDWj{Pe0SJ&ko_eiZ<CTFi?aA7OeX|4K;obz?v+
zC61AR_CKQJzk_9s05-y8ILI{GN@R9)wj-N{B<Ba+QeO*Aby8ZLs5!h=gZVa#Xk0Q!
z*8zKkbTglIy3%OKb0%IbDixs=Q)wU9c@Q>h2`xTJhJ91PoH(^oS4s@R&;5VuvA-f&
zNm5qM)uG~oaSNj0ZkwSZxYAJH>jSQCQ3Ge$CvWOx0}b6Oi6j%$nLZQreb^`!=tyX}
zcS$2;$IsdY^G%29+?vhsV9w#wY9FzWz&RwEy!}U51pn7|1RTP!glW4koc-RHM;=(`
zSj(DG!*SK7KQE8BR2)T(QvdQPgYtoiGG^WjTLUr*dN&vHuTcAsg~BVqh`cvtc>tYB
z8myaiCN5CQ`*KtZj>HXH(Q6VsSgiVVy3?&>g^jb>w(>t8Q}8k8T?BK1ni(*&-6n#<
zD4Mh@Fv&E2^@NjvA%D0JQA$CrcbA%8OzuBS;@_4L$q0D6T{HSIq<Y|6LbUwEn%{uG
zJytEO`mPAZcd`ZHNdH6g{jC7e-XW}sXFXJz<NK({H|0!>7f?4*1#0MYzq5zq^uuO6
zQ@ZK>ofi5}c~Qb(s<6PF&1l%@*R>>07Dli>si89P5|J;X`5!06rKq#9D?VQ#tXHjA
z&J^c-azqo}3RX{&vP<lc3&T8!z!Xmy@bM=;!jnW6|31&|-)1*cB56$Yw(KUalUmYC
z8=11KQ^&v763&VUKr3&F+_!}Q=&h?dIqo0>+nE>4SrKc%c?hv?j`*`P_(gDCx*?1z
z4<VMq`SRMv=kJMfR@lb1b+@)65cXGoeXTYRe`5GGiNhV-(=yl@nI6gwYWagsWniqL
zK<hJ@=~1CXuCal_lh$gVl<ydd^$sO@Rdlz=Wr27ja(bexidh6Bs*tC4F3LJLLD)Xm
zP@4yWo$*F@AF}IN2Ak;}+wg&`=bOqp3?7hRpmz^11?K`-@G4+iJCiWx+yoPZ)Rqjm
zW%&bj=5@SrfwDiS@^Wza$O#4Gy`{;AFl-FX)p?b*w@5=^U)n7j6Uz+X&e)Jin+weV
zU=%;94=_I<ATaYMqp}P;ql&}adLq9yKc_TuHFe`Dx~EKp#soVR6Smtr+^{OiAZQc>
z$!a&^bJE4vBOJAtVdU5t>ECO$e=>rp(!|&iRQaxe!8RY027Utk8$O~){jdeWEo0=y
zH#1$#M+<{+_(|~=bO=Psc~5stUAE$VZhb}fUi{nF0QjL(2%HfF;+KVZgs_7zYbhp=
z1{1OL@6&JMu&f2srP~&omhrYo?#B1*yc4osVvfgDjyAcDwP=Q}8G5PU1fD2^x|g4V
zuH7sKfDie9f>Hm8>B{-TRtYS)YL>8>rMhT#GS*wgMN9mu&%z=0FhH;H-QR#746xJ$
zDL~jU?=~TF!wP^wD6~^k?TE$TBuSQ(`iK|=tRDD0d3HvVkc`0P;<NnvC%@}e;LnRT
zFl8l0n|nu&4(3$6BcWvq)9(9o3UhT~F=y-57}p!{UJZ>CR042fV~H@6AG74lY~)MV
z7nHQBu4$J_8hcE?V8S?4%0mqPbJfWIzH0xTO$D}uwubs4Ek>}2ppsY;OV~M80zx%%
z-rM4w4@^#({@W{+s?Rgz<619|Fpf=-^9=}v{O(%Uhmvrg<G0iN_WtR?b0_L?^DX7z
z3wi`~`>k;q@7_0^MN5-va^ZKzFcg}UWtYnv1|e5spg8-1tODUH+zo56UqYUjZij|2
zt%Dio7GI?SLyvlX@zp@)xQiEyLKh%jVOYodVtyIJX`jDqFnX`_z<&^MTEfb5(y1eh
z&YazR6VoQnNp;7-l=0DK8TvtN{><WgJAh}#+n=$956}?siakm1p!It5E9n7xPd>yu
zTRBFlzc2VG!_$a0!A>(4<go1P9hP(JcpD2GdvD=d+r3?bA(;SiiD%p+->?<GeoYw*
z+}c=Qzh95?2NQ-hX<iq`_dOpVaq^}&*x!C!N3L~V{KSx;E~F5F{fB^U!ttSgY86=m
z7@?sH=JgVTtK|A^C;Kmrg<T9bL7Jt5S`-a-F9Z8M-}ygY;SofHySmzo`{1@7Otu{4
z*P(>bO=UR`{9uc_yhodTuc)^fiAs}3Ib*_Q3I^)2MMe(T1eJun^?SYf2PE@LD;*va
zl_VqqZ+6hBR3Ob^2n6?Hs7uzy`kdqj=2;!$`V}2@ZIewUmIYz@Tvk!`I!y{!5@UUE
zl9<p3rHS!NxQic6I&NEV28&{P)0XR+;aAFh)&puWR8o=5JWfc3UPvDFxzK76IsudN
zo@7}{{f`>`7aHI8^1mh=z)h)Ku|GJwJzDf*RO}%Sudy4VZmJlqlh#gls8}iM;?pK?
zNOmaGx|l7MneeM$u?Jv>1pqzr`g{zcT!-pm0PQbyOpC=2r{neWf8q&$>mN(S0xOY@
zwWCH99OX{-bf;-ETn0LjUccq*1uC^XYkkid%t8)=Kz@*Xl+u+GLnHil%_WB23d>mj
zPu{xIWRB-$uIUj7ZA??>T|Tr2*6OYilg7$&o@`LiV@b9kJ92`4`9I*;s}6clX@)(q
zZbhN(#1&n;)~DJZ>laOUw;wvs9qLmvT3a|em~>A{7xKe&qe5bNb1F4mL5TjC=}AI4
z#p`|#xC$4|LB(B;kw@9yde&<noh%hE2Q7H~1@EF&8-8kQ;RlKee|?`2ndCY0^Q~JL
z!0LK}6U7f3;CC)Z(4eG8jXPV*`HLgvc_u@w6TM!{$~@J2LOciQ{<Sh*G5zBytQ(vx
zVlFouTuB}hY^u0ZqA^wDn#a?FXUne#$7B>fyc;dme~G@<-c5N0FmBzJ3+>s~*QJzV
zZ<Pl-+h$OzD&0((tFu$tMFbMQphgcuN4yv+0E~@M(NUKEI+IwcGTbmDRccxOpbuo*
z>2X0rY1|O8JFKDf)1$xbDyVh%*IB>~m%`J<#m7VThPQIl^*Q>Xz0)*O5S4Br`K@iV
z+dz|T#K59!HtLO`6&#@q8!LQ8HJ5UHMXe$qYky6a78&FWU|ifb+gLkumGP7IFGkkV
z3+b8Wocg2A`dQD-({_J#hhTCa1v$IiY@@HC48<&1m&U%?Tgj9`X~hr44plF+ye(lQ
zNpZL_$mr$E2bi<@;y5-89W3^8XJ0EQT_JWH^tljPCEIeO=pvI*L0?Q}DhGi4$g}G=
z50#JCew*_B22+-TPj?%hrQl7W_(OnVRKRtu6M`n*G$I%~Bn0?l+1%dF?4c~UcU+?s
z>lJ<&^Y|S}G@fl`*93>UTsb75IKIk2hPLwNYTYJ&EwWs1aj+Y2!YJ{Uu@7!#-}6d~
zIrInC5`P<`XQ>9HVdA=a<>%gHE=URf9F(1Dge7*`{*~Qvn*3XHY|EJ9J)M?8dJ1pU
zNft0Xi7$~alTr@(czby2wr1Q6&>0>>OejkGH>1!F2|3%zQrk~a*b{jKyBaQvpp>f?
zk}(_-RC16jpwy&jAqw>vJK3o5p0;h;N|}Gu{x*)yJ-hb(K9@8Lbs9UezxClgcD!8q
zPZ$_)33`#;?C6$)ZKJPE-xoifU0dSVS)&uo#et;{vU-P=P~T%s<n2bR-Jc|68;U?J
zV~niNTtjq-we&zctSmVQ3atGzcJ+l~O6PbvdoVc=7|-t2X$DB*N8x(hmH0^oDEpp6
zHh-RpAPEW}>?RWRpL~Y%RKe<>nc7l4^&PfC&&@$Zwt87T9G#U1Gt}AtwAR~EdVgo>
zeB`nIR;-wU6!#k%v(?T?c!R)PJ>QnhJuWd%`GGRr7|0XK$&uBj=NsS^aleGN-D{hQ
z^_!uwI=U9Slz$)+OeGPVSYQ%AzHaJe-iTTomgb+qQ#^Y{s!;`z+6f6n))j)bB3s@2
z*t~Pj#s4~M*+s@``Cu{yqO$G*O08_&OXYVfMm8k;?g#B4y<CL_nMv@^PgNvPFd3C;
zgha$Nj(_w-K(hBFiM}DaP$E+#2Sv*gvkmK`tTd9Y#8uRhpO&Ujx&Q;kyPD6hQEOJo
z&&ZH4f}k3cAXO_!f0{}fTUk1q@$)u$^nsi#7)cDY7nOtp4D^%!C-V1=qV6B%Qn944
zj4b?1+6KU&)VoeDgTpq%l4=zj#D<JhBSL*(;R4a^BkP*t3Py^)dmDJt+EH3&ScP*r
z2u-n#$D;ZSlzH@LQ^fM-PvZ>!@ead_J&?XukK<%W4{5M|GeX|**Ay{dczwNHi*I)4
zCd;imX=5He5VcJ+S5zxD>BCjbEQ+v1puEklFDi|a59Nv3A*fctS_JK>jKg)W_f*(b
zk{JW2jw3dZ(qA6@K=0vRk2Onp8QQAXM|NDsW&Y7)gtSDrL?0c1X_hD?E13@0!TXZZ
z&xHrk#`eA1vvhHVq0I|pW9yb?YsowBL19pArO`K$sq5-bO8fnYpmAWmMs^W&BMMm$
z{QG?hq?d5LWS|lcsQP5uB}5zDjjB69Pev{8>II3Wu|yZtzB`Ac&9~Rg#SBzG&l1jX
zc&DuzUB1G`WV=(pnkVVlH9OcOLGfnRA;=Q}?or<J@XJ;?AKoR&C*4oLXxRun>E`l5
z4SCL{2H}PAUX4#~$X2a5SYB^FqZhUkKua1gx(g02<C93g%Y;piM);24d8Q;}o?QSk
z22dOh@`GxFS{(6)kt^F>3_K`KU1TxO`^X}2-eOEK2%4$#Z(8OKMXdZ37u<su8usH0
z*5Y(LGisXw6viWIpLMk8t$<Q9d6=EU+Nl({8+p7&N}8%zu6y{2a&xF27y23aG@Qnf
z9blcR%c}P=vIl)Pl{O4#ATjnpo9}2d4>&A#VCp6;&H`RbK;ui1s@>Sg8|(CxB%g_W
zVTJ3e&Sfq-9@ZGHa~zu1WJv0z-a;DoeX)Lv>D8A}kKi6op`-Q;)WcX;!$Af{`P}K#
z8Ws6HFeMl-71#4jimA4pZ2z<GhTP6||H!7)dt42-2cm~R{D(Rd%PsrblVj5kSp}jv
z-rpdbivCQQxJ&M^>g&xg)E+#ZlsIPR;vL|V5@>kE<rT&Dbf1Q?r5!jIo2y6r9Se-I
zN?*6!lBpYjNg^Sui^qE$%L+m16eBba-{YPk5eG&k&V|@N6=FToZDbDotDPxEc80a~
zbNT~a6TxZyF0xA5Y1y7#?cECYNY^O3AUCb*F?YTdg<w`s=fxVRpy3R97}};s>nSV0
zVh59_QgIE$oDP+IQJamrGYcKbgQ|KZGC}nI`e<ex&LnX}m^ZO8$A)vX?)O~n?s;3c
zzGM9?Anaik{w*#`V^3_4|6}W`bG--EQC*i<?~TQ<LcQ4p$GU2o)7AQN*IWHjv_rJ5
zinbkcf*ggF{w7|BWqCdQnWv1{X#ugrgxhn&aKp<lhE1TsA)5y%$2|bALz_z+G5sBY
zvLj0^no$pwXIIi}gbd0L-{FWaKN)dgf!P;gHpQ?~u(_C(|GvYu6)38axQJ!mn{#DD
z{vEIh0tVi+W?tL*E!Y}0Q7<Betf84gObIL2+a5;R8k8-x>K&H~^*1#m!6%2S;aNO4
zA-H57c-(}M{BM3djK}Ri4H&*{q@?)rwqkc4%Fy{b<DMN2y<}Sp8LAl}k7%`zTj>oc
z3`L3&tE$j{Bdu1E&*Yz)#y1uAb)l%>IyeZS3Et}LYF7`b)15=fs^a4#34jZ}Lp`VD
z$Gew;^JXlU_)Idec`qiXZ}g%ui04dJ@{7Jz+4lCF&X|h;9FZ%e?qGJ1wSl$r8kR5d
z-g0ooezs29=Y1t}we`#E;LL|{A<osjR^cM0FY<7)?Y71~C_o4I_LhSg?^UO_eW@*X
z5*bD5`=f*K+cfji56xlumY#-iiX15^T6`1y!q-~Ey74&hJH2KBa3V7xkC#Y~8jTUb
zu<*Sj{_Cm}h{MR-^LJzRCDPW0>+t-sr_80ouGoM6*qC_T8k|a(;pFf{TVIoBBKs#F
z7iIfmQVbp5x!J0aI#t3G5F8xKiY*H0z@mi2abLmcKpu&A$+9Fd(@$7nRduO84bRRp
zg|pf>(la+Kv|UP0?tdE_qppN4MXZm~BifS0jlEJG$ey@Q3!-U$tw93rFPH`fQq@MA
zYFB4L&ToxH%-xQVpbM<Q4)+d)15#fsyJqh)(1fHpQW@W>FaHd+Wc<x%@Y0RN?Jv(p
z?ZtE#1*G0~Nl!Q68?&%za$g@&`q-nf_GhIh)pO>fXC!f0P+x;P3ffswX6(WhcHD*J
z%!q-Ca`*I$=Gd#)ZCI&R5n(^JAm*k{%9A}bX(qFyEgn^#O?7(joQU_G1Hlni<2sMV
zLwS#`M(~u|4<85k{yNyJ7`E%hh<A(US|3witBYSdJZW<OXb_V`;$BgaU>1In(>vev
zB7W^ze7^bY56I2k0dlNZQ3h)S?{|-a5q1uCLTb2uz=!vkH`Z3a!JF%2{29H!rC2Aw
za1)Q6r^;}6PS-p?5PKIz8#VLa()Oe@0zv#{gs0S!c9*U(vR|3hOOMSG({bBhhO^EO
z`EHFdo!|vUJPaK(d}*r?4`mKU1_?h_8w|>KkfB=aFHQJvkEIwO{g4B#4RcX;{c^#n
z3jv%7!7h)YB>9ymd_nUIX%4JrfkBzWW~qH%4s|?QrA4Jk5lb3(N6<{#hB1*EVDkAd
z^vh-^5t8hL1^DPhmUuUhbfXn5DS7CY6I3c=OYv;KppsI@D^xqR9ygzu*+1*YYdG*7
z#bz81zyB>2ZW8LI@D_u>6o(>oOUEh**=o|5qFdeI3!g)B!Bihe<)Mm%igGU`l-p(u
z)_Cn2sz;3oH(YQk>C_S7#0Ul(YnG*0m-G{XMEtK>uU+w3e~4N$4F>(2mwR$k*=w2~
zNc6tH7%83O=Y<?yRGSySC{K6Wp6#XllD0|CVNp8ryY<s9hYZbIh<Qc69(GCE`f@wJ
zkNk<I)S`q23}zToOxkFqzQriiwq&n|lr0DsJ^Pktu_Qo&plF>GPYlCyf4N=%w!m@i
zwLl?lsx%~Ut-1Pt->OlZbg{PWcUU{gv>nu9Oe%^bu{Vth8LjlPcl9Sl&K{?wR7|KV
z8g>|O>m)W>Tbpl^hvza-Dh>bYfHWV^EfaOq%9JvyehZ;-m$A7dlW5?eVZGOjXTa4b
zd(uJkJOS25Nk-b+>m<fJ!`Q1>ZhC2<J}yKixLZZY9{WuEAeQzyyXZq!+{}cZvS!1h
zF{2_6gMQy*IA5Ae^g53|x7jb$P9;eilr>$arL@i}6QNsc$iOIzHTmX6>p00K*aAue
z(7IAWfNb$=JnB-|7qy3XrTd@8{=k5Kh{@m;f|8P(!#2?p7*va`&BYHi+1`LUfk@c%
zFqxCLuihxhiPyxJ%BxPc=o`^&iINr4|6%Mb!?KFHZcTT0w;<i!-4fE>-6h@9jgk)?
z(%l`>@X+1TT_PZLw(s|T*LBXn^UoiOy7$^^%{9k8#vF7<C?}8@WBU-waR$|hac0Cf
z6w`;=*J64=i(UAW!K2E%1;$*s>7fo%2;c`sG0es<96!d}AIHI3!?+0rs%RusYEaMq
zo@P@Ppkr99N-`P)XB&va=-cDJGvLmm=fK(P;G!3(@K}@Vl2y*LfBWtPb2^?>U{(?{
zc9D3m1fK=t&wZxNoq!pE+bqqJJ3O=XXPO}ZRc}E&F!wXuz~WcGFv_wmE<nT7`su^T
zbO+p=)6hN`0uxmQBW&7_B<~LSF4uu8kCVoHz<@X;U09>b%(N$pM;tp!^9Pb~r6%&|
z_+@`Bo++d(*Dc_<n_fi!`Ksd$){s4j1-dQ%YAO$?X}~W+OPa_NivG2?t!AaYcFwOR
z62{{$50uacd1w^{A*vN1)JaFKQbo9tkR^p%p?sCsGpV{O@zdsySQRGN{SL~9hxS!a
zW5)*qtWg+kbZhb_H@;J`LRU>m6MS4J>T7%m6k#!jnyi1Vd;QkLg3tH?>5x01X5grM
z=7_Er#FnT0&k~BtHXKOw0f$X=-g2p1&$PYB3KaMB`mGBUgO{*3a&`!!>;EFs`HdDa
zoJ9VY1)yma+bGj@@tQZxNq)DFJ%`_?qA)eju$sk^RuW0OF;;3v0ePM*W8%*DU`w>F
z#oI-p&UfwI7;)Hdym+`A_RxCBV`i}jDF@MfZ}D+x(CaL?isL~ie9b#f%^l5({&Awz
zcJ&kp;pvy;Y}H}`2R=?2XVFN7IPH8_Qp%$nF$ZXmLe!w3xDc;!c^;)kpb@c;BZg0z
z%bUQNZL-eYi(WwG*|c-cMoI2=)dag!<eoUC<?nYMA5t2#Rb0TS6i9(gorxa>M(MZ)
z*7@C-NEwRW#<?hAblvKeCE_S2CJ<7KecRv8Gaw1x?q&vcOzy+EEsC^mO9seIR=af=
zmjRv3!)@FhWN3p0C?nF9-l9K2&c4dOdrRMr*?~R$jUfFGrqRC>v-W;w)*I-W0v%`o
z3Jx)3a_(R4giyb#;b-Atx=n76EP*63gRwRYT4A+M1bx5Mglw;2{;;gFL1*mA#Jr>V
z&&A0Z64b&(?+CdmkRbaA+4m2MKNy3R1wj87xXFOdr(z*g-qp$}#nEohm{I1>YHFu_
zgt}{jYD**{jN~<fx%7OBkX4NZS>Jz||ML`&o-w9coL|wEP*ISJJ|jH$hHS6OjI426
z&JWBb<aOKhf#&sr9RMgl?+g5n|BN`Xqfu)qSsYXJr_lkCFJR1Ic@%OQ4N8>tMn@o0
zsYo=_g^*9sZ70LE4zrG3*JH%Uac<L8w;EMa;{EP{(oJ(6?gX*4y<53v13;kmVFoY*
zV6Cf4<#)ck-<7H28;_!l0mCJh(x#1=dzm&(NF10rOcF<4fjjLw&w21?#|}j267xwO
zJQ|a^0OE=<A<^vRw}cn|$Xzx;7D3FIaTyn{Q>hq#XkNG+TaPNu8cj^Z0Y*8XpD@V-
zW8GW5W&sLrQTvjxdH+S%)(KcT3!Y-I-LnueN6G6^_LoNDsg#<cfxOEg-V$uZr{CQ3
zky3g4^Es0LV04Ta^8UyrdoynqCyL9F0rye?hW{SO9nN*Y$&ANT^6EkYAlex0u`HK8
zg;8Bgg;D}94cRq4+-hm$e2iBR8JG^6qGw62-!<Vf%-$MB!J<^!IEl&dm{z);D=dxr
zj^SEw3q>HrJ^ylD#IYmFdpUfu7Qm<WT^zz0B@bA|=yzxnZPh=NdYU88h7_9>1HckN
z`a2@eCXWB2w$_iqwmwq^{kqw=u&9&n&K?uOosTwuYu|%6oTh*2I6M>B#r#6}<orKX
z*LO&H9n92%G$j({6@a+=LXC4RR=VTxVSpdsr1N=qln7~NxJ$2U@&pa+U_+a#ZXBB=
z5z6VTe|wX93p62>q7F12iHfgU{=p<rbedO@P8^82oBf%j#Nz3|$ET&Ef4dih(!K(i
zGV2HBIsWb;&fUG$G-U05|1q6u?pI-vzPb^p->h3YNI3GEVGoQec#ioV1Kh@gT_1aU
zIHOA|7ps{*Ongg%9zAOzEBFCp?NIeLcCC3Rwhl$QQYFFzkWgs1oE>*d%({8hE;4;%
zT^>p?QzUNTKf?XGAsI3ir&{9eTQb<T--1rV>Pz`I+}3|nvbT<(1pY4|!I4<I{PWsi
zS!^Tm>yeVDM$f;FKmNVPw#h&EMn~a!mk&@cDa`N;b>LSOClbbX?&&7e4v>#hEd#I)
zwu3dXAzKl^4pGH|e10%V6_-(lDgKv*8+z2Awj2%T#9AJ%gYfXZZ9eIN>q|?Jv$`xi
zvSwZXT!RLO4=npLFY<svMtyCkgYzaNz*5X}WkQ+q$9PrV$~C*K?1r=kT*CIM6~RK0
z3<Vyw%wNr!;R;AzlOr7&^0%qN<2!f~BAGwjn?=MFhG+`8bi)IM9WDrSuFmb(<@Q6L
zr51*1;E)nQ=~voIAd6YV>}x&cyOLs=O0fBEG$UW*NZYT9tT{yfsg24y%O5L4vG@mr
zN20uS7gjWXTOEP>=P>55sxqw}ZI>&m3Qay$*)7m>+_(W?sgf&dUShGIK)Gm!jLQ{$
zJpr^5c2fZ9iuyh|jEN4QUcwqWK32wM-QDfmLNhs*m7-2`U9|_Ez@i)<Qv1{J?fJ4f
zlR$nTmdsN6)WGbw-;aNl!Y{|yZN3La!BuyM278HrjwmZVLlqnE`qCm$@%qYvW1=Gz
z(2nAJA@VtTAsz1%dUVy{MBRN(naJc>UaB5oV5c|ySX(7~IqqV&@Xd$qRg*OHPAJ}A
zXSYrSP&V94Z~l&1Ay0=};e-g<@fl)6^C}+QlSDnUAC>1*gFgYf)hc~)UgUrEur89Y
zP)^pb;|Es$s=BMKp&-d<?+D(~VxuJ4Q^*L)@Dw)t>9GZXwA(cFNO)~JYeKQZVAZ^)
zSpD_f4F_rtWjqJ`U~cE<us%fm(+PX;50DiHa3CPVqKp84qo4Ek-g)d+8;9&8kp2K^
zJw|XBUSoL>u&<b%@M0|rQHPlofQGSBtzdc*f}wt9Gf|?Hu)>VNYj?RbDZzm>2aut(
z?Cx4SaqE~VnENx1QZ80O*0IntL(;tXcOkQ5hih2l>0ICW8|^1mh5?J0MxVtC`~yTC
zm94NTQP9YX;WmXslU<I&TvLE~Dlkkp{A;N`G8m7lU!+nOEAD%Fh6~|7f7g4N{~Z9$
z31b~Ei&F)(pzNUd%rVjIS$nMF%0Rt=f;qJoEWGWJVvsx}y&1X9W}!m)kikq*g^lrO
z!hyq2jkAzmFQbTr_cz|De4S_}77P*}Gw<To-KM%+L2B9V)}NJgxm2c_D@tEM!A=XZ
zd&&i0$v*xSi=<jl{MaX!4ic@n{5mK}RhvoP!u<6m+V-o;N=p$HyRpb*-#r~@O}Xvu
z^@kySx%<h1%s<cHE6;}#ubUUE3~y@+drTxzgaqT|it&OhXyb$z-*b?Ml#VEJ=S;z*
zmt=`VDZwNTPN|QH*V;se?2eKZB+*;Wn(qrpXtgY3feDXVlw2fo=H246)FE{-+Q(>6
z=9ir-sWN@8kjD2OuAewu^{k%_n&01gMG0h2nLie%XzqC?v3{<Rv9#bB2yl07Z=JC0
zWap)c?Y_RkeUH%ixg9JJZ=dC#?r%D1&t|;;h;%6#faW6*$Y+2(sFtppNDtHq1-YqY
z+wVY)aMxS9R`mX_q6kukUMd0{8}iv(H_c^EWAG@n2iE4HLA3ip@G}z;#(&;#@+Vi_
z1vcXW-}gCAZV%vMWH&jV(v;_bF^L5??dB)|2Sg1^2M3v~Hyxuh$HII>V8_FBA(RJb
z@r`!M^$+!ilm4~pZwwN<s@zprbk*Np4q?dg0S`HWeSBlbNkyL%D60A0(%HWdOG>w2
zc!viAvle`jf5xwc7)Dy`%kC9FZ{|SeFd$cE2K+fyy;J=S=pPv|##KrDmv`^D@FrwU
z*a-3aVMxS`0$4wrPNDEMJGt<2O^Mu#ZYZPB)%{v7?%ITEo-LXKNzm_XR_mmk^Z`7o
zeYXXT7A3WY3yL~|y5R%F@CPkM>tU<@Oi#a?e`M6+zLmS<`QF`fZ!)9WYwDhvJ|M%&
zPrm;{s3}OSs}9HPoP}Bf`X&(h%=4Jw*lsFqO9g0&E3JozhqAid!h@N3#4uzC#$+hQ
zFmyV=2qPJi;Et2?M_uvMJyU?_gdJpRL|WXJJUjWunW;5E!&hW<`JKa&5_*{-8-<fu
zzr2Y)YsVfrGEz$7Z^Xw>9M)fAlvdKH5bJ%Ey?qedvh@Oc>8ds={cZc~{r1#s3y$4=
z(*`}+6J4KgeT~pr<&KFC@7t$Vx_ej>Rh~H=`zEGhvVU>#&{-`O2rB1!;Llt7pHIS;
zi*hjMk-w99z;$)qt{XPgX&2%!CTSmD2{UV6xBg&wGrycl^)BbAl=Vqfx%1oaLa~em
zQa|mnWBc18oErO(42OUeku#0U$X{##oX-|CdUEKvR8UVZv$y6g{DKyw;@!P}f3;H3
zWKjB|KX}GvEQ2C>LjcA15FqIz@_4qg>!SR7*?_7%4>%j8Q$z&Pxd2)GB=i4vSm|El
z#TLW@7Chy`Am_C^glLlB$ea#puOd}3$i(N_b}a~)31eJlLf^qd`frhWtK0Lq>a9|c
zpL6DoO+~kCu*My!?q%W=%B%Ex{GVvh8SEO^w+RajXH#yl@QoiEHZwHdQG%2Fp!_;1
zkx`3t1@mlm&~sc-R=bW^TYr#q(%D~L7(CX=rfUTTkC}adycXI=Rub^v5DMm4CM3Sg
zDjB$!PA4WtrebR)CM6icjv*R7aWwyjeI@0(Bs|R}^p)_Eum~?-YZ?DVzL)$JuZc7U
zgs~`rrF&@V;uFexq5pP8LNSOIWL=dJD@;t6{aPnt{gB)HO+T4({{Z445#r51{8cue
z0%>qPlkQdkN5gb)8v4fF`sD==sRHm7qqBavC8ArbMX2=0qCED9xbb~fxcj9}h-NI%
zD5CpifG=qExa%}TVGtj}w?974FJtFCT1NVS%%IjeJ5UwxToKvWe6LMs<-n`U!(_X=
za)eUFq6-H;$}HmrSutUJxY&JEUl*96NzgFOxT90_{6$f{f*)@^ojB#d9LMy=OYq^&
zgiPWwXwVgI;Jd@7#h<TJ{#_2`80mn2@^*BpEJ=xX6{(A<AdR)~mh;X~Pn-QcVRms6
zpO54@>P(UNi`x&x+}$qLBlAyx;(_uqKyQ0RqDE#HZ@mlB+P|Ql$n+21<viuf1-v<G
zS?jXkI{DyWrgN(4Uk`#jAy<WA)Le&}JdYg?snqo<ldqQNS4cN*4S~BIj%BscyA_IM
zBe><j#<#>hmrx5(Nm^VSN5Cyu{0z|Y{qZMoL0_*v7Fk;)dl$*faATn>={9Q_W@WPt
zn{Xvqf80R!P+2U~=9Y~_X>GUQenfQXVizTrbPcebL`-+%7z&U2TlDoW<qhj-|0CZ<
z)s(7|1#3)9-Rz0odbB$}N-<@d=^k>JwiZb$=JIzQX(buRwE8-E;ow@MGLj9bOC3D=
z@<-3C#+Xm5qJy6Lgr6#14JR@-lEMwLA<8bp_w1-P8mF20sbY8Q3bVcNC=7qZ)MF)L
z4R1nz8{awTPbbB;Ack165-U5~88xHkdv*pgtJo6?e%oGh_89C0wz!J493vq&REhd(
zb!f0K$p?Q1qLg@EjS}CMc83(OHioJvhO!Lj-8Av!+<i?qww#UvMI6Xg<+nm7o+QPj
z&!lMcEo?+chhV;|K)wZ_@N!kM&9^J*JP=CU`ay5qyiZ!nT#vXc;A09r!v;PG4e*nH
zrA{IWzx|-l6vAngdJ^-Yuj7A2Rm{eHAaO*yo9sC4<U<I{{;lQ&lDlxj%~x1PG(V(=
z+}kN7%KTPah_>#pwk@Mbn>PRCAE%BpUmT5_i>$Phe97P9!0V*U@>(J1!MYk5F!F#$
zQ|-@$#%D%~{0|OS^Y3-WqJIK<kx36p`bzr8xPSl%uoj2S`pTYDZz>eV+3<8aE;5)a
zMKj&Qs(Qvmq|V0OPt_Z*>FXSxj|WZfs-iR;U(Ctd1{}b3y}ZmU=zWw{#FTTQkN}>q
zg8&T13v$SS8}x<##w>m*EnuEmdoypWLNKBM)JSZn6^f>5?_zCK?mTbMuf5ShoOA~|
z;pAHXHK${47<okB4?Q}#;zrnV7^KeF2#IYF?l+wCiA?!$w-ZZzQd1h@>1fnpDy5^q
z0S0@jZ5#jrRRh;P5q;bGtx=G!B<PR}HJuD&Lg6wTK^e?TZWnFcx4bsLro22FSC<d~
zJ&d<e+8pI2SSCB6gsOIx-ly3I(e|tATo7~=`+^5w1x@c}Me#=Li&c%!L55-~ZfYvz
zpBh3S?x{(TEwZ)lA~+Ks)doc~md9oI#lo|;#_^UJt54rMmy}OY{P<vJIl|hdydlQL
zFnn~~-@3+r^!num7P<IHW8b<ezYFrKu>(7SzF4VIqrhCfcD8B`6ePbB@WvoBHDEj{
z9?UW8qQo;o|HtEc{g2-gQ6%U>1AsLZd*PJE|GcVda`y`M#PpU)+%^O`_qUfPlCM!#
zKdj5hu?Uy=5cvDysKj42Xez8(Mmo~5E;xO<fuI~9KMXO?V${;Ok2t$F)_r58woqyC
z4MiD{8ev2|ngCH(9ZBKJ*0%i|ZYiGEG-=yGfd194<m*vKi)paDN1ZD<@n88e*)~}h
zo;GN=oDVqP{*EHb<a>CYK&ca9Bo=^#F1ORjxJ}bN+S_>#o6a@6>&8fzcCoyRB;yj;
z@Pa8F%==*^+K;ku?(763g4SeK9$3ZXi6h3g8_UF>e@Zl<)FtnxS3`oC2(sZamI!z2
z7b8jz28z$Fh{G+G7}B=;x<(F3<-ergz)p{XONMG>y^eJ9MG|FPmU?9Ev1R1pSCqea
zMyyx5wInnGiraW-`NV#p2C5Z;$l!2qpr3}X^kLP~k<h0%+(zxap<#tRgdR*-EeG?O
zA%MoQ7`-4UYeuu`E$E?zbFcX%khYb`Z`&-vs|9OaFmIH2-VO8xyOtUD^#SYm6V@>V
zNn!9-8NWq=M@G9A#R7VCxa_;rX9a}v-0p9kajCYDKb9U`0@_14&vMP`u{-LHGtAJ~
zCCDPkur!7!HRb?8gmQ_rh65R#;ePv9@o9PsR)@;uFFeQ!##@DYo3Ijoq%UOgUmntC
zW@ysSLbnfMI@sYU4c$PX&c@d+D;Vo(6Y`E)gnnA1@yKDv<B!kS2CgJ3BS>G1CBN!G
z_o*jSS7U8BcqG=F-RM^<WT?(KrOq=LVu4<XcjBZiHUP|N9r(xT|J>>4FN^3DuI8X0
znYw9=hhTcSE2M_|Q_lxY>|S*ba_b8RH9$ZhFR<l-rtYm&IM#VbE@}<3+xbI^a6$C~
zS9M=xiN`Me8)^Z}SRt(-i;3lPu@0u~cG34$>-$j8TTTYrPCU+=V$n*$#%sh_Xs#=d
z@8!9ZBuOpx-Y5kd=FTXSQ@39)M1aR0QO!n4ma&!i)f<w;%^P47bjv=HmjcvA4n#5>
zn4r|1$XWVn*f^VEM+Nq`%v7G8o$+p&7@rUgSe$@1M;oQt0L^8QhkYqrvVozV&P7JG
z1t&iammI!Ug=ZbBk6lHNv8^B@(VoW5tD4_`DVU1jo@1ZST+WIia4cy$U{rD#3R0A!
zDtYGIegE3*k@nfb&3}MetGJjkzI~K{YTOkQgbQbq72wXFYJWkB!_F6lG_t9B9{fGV
zeCOO1)`!V}4c;_fY0emZ7o;qEh4_)L5%Cp}3t4!0kO`igwe$Z>T;M9fDoY4P3^mmC
z(?KM7WGTZ7H92l3yAM~(Z0fKTjF%wg6bRG;Pgc!Y-=|&vS*F4ItVgMzFsJJuKJr^k
zEoHo!A*001#u^~y+@|i8ISe~+8<$4`2wnB5J*DY#Z9s#_%eirwISFtfOh~eBl2&}C
zpcp}2;7;D1r*Bj7gx!G56btwIx6O3a1?B>VcTwbuO;{;ln}JN`X3LSm;-luuUTy?+
z1@k@wxh4pyXPvZ}rJ?>G*S~`ps%Y^p<DLIzu3P<1`^>M<(FOep7Eio5)l8USX!Q>}
zS)kLf<cw+?e{Pt2-}=Sl%n&@aR_^oDH9t+lCmzk^m(8vmR>_KEwy{g`i!A|Ziw`N0
z&l9l8oAGN_`=Qa%;!w^)1~#8zL1kk3WLWNnu#&`tS~}#2Z>xO;F5UdpWylop5Jhi@
z@OaqE+li-1^DH4G15C@;3bf61Wxu#B6<XcYQJ3pDkB>D@rD@KmsWmY}ZDOJ8f^cZl
zavnR`5M1Jxc_Nq^k|2Vnwb}O6xPpN>Gyz{EGk@3D>p&tytbiQCm8yovFqB*7kNu%G
ztVto>x8Rh_FY8Q!o10%7c;zwl=>GeNQs~?C8lBvUgiriL?8U<y?m<8wW5=u$RF}EU
z2X~P07w*3gW`fHk*5}SZqNHxqdmLhUlcQ<vZ3@D*oUj9NoU2E<!AD_h5cA4TPsE=q
zg!dY@bbVsJH;L5-C~LaJJdWKsz)Xs8fbr_(Fu;1qXI@t72>7BHds((^BsLVa<*s!|
zO#bWqJebwK;&!V4OsBqox14-+KAT-)P{Khr()wS2b5m#gcP<AX%TjlOScPmRg&Bav
z3e2|V^Vq+#r^=}ocFi&2_V9}*h`D;7m`H@R=%~WjK%qQj+TXqC{%i&9+Wq6#eatf2
zSrY2#PUA6G=X1SDqHH35?!WH&wCAjvCBP{I-Sa3QM$NyjEb`Rc@y4Y;GS^yFc`D+n
zSoGsgh&_G^$F_j*#V!qq7+ao`o*P?sdT2e+nTY(HXG3k);V)uEQFla)J1|G`1>_W&
zo=$pi*C{#}aC7Hqiv}Ygpk&GWvV0%;+JKjU9K_>%G~rTMQzmmHz^k&i5-ud`5y!UL
zDynD2YX@WX&-X5?wpn#|L0W9}x}%Z*dj!;kuGyyv86-;cw~&QYO=(g^JC!${^%KIV
zPW9ZT>!g9LY?90N{R50O9oR_9=718L<K%Bj$jDxE$|6%M)mV>Tv)n*;YOo6m>ixoF
zLx^d9rXe+dZbKQ`scqHPBscFm+sp63E)SbHq}(?*i_b-x0gR(TJ7^YiwjZ;BZOEj>
z?2<^gaR)y-KK48Oo3Hi9$m3RXKPc-TDmlrN?=M=%43&Ua97t;~euL=;q<-gNi5VHX
z)z@j@9FRy6<`$SkyAn)+oLjv()??J5scv9GMl)D#^71YD)T_+%ll18>Yg8zHlC%wq
z6)F(8_WFG)$x1JBoEXVcXhA`L_+p;%n0Ngvb@yazOVE-BZrP8a!`H`nG##hF2I#{T
zblfGB?@azDSm+<0shNo5G8{?UqZWjlf6Te@(;6O}D<hIlNs7I%&DW3x@pFGfdC!6-
zwNx>Lm*X{Q{t#loD+|i#B64C^vkSEecMwoWH^O(Wc~TQft!K2Ewwe+;3YuQx^7Qv;
zkB#k9l(_kN?rXI<L^sepm_gs{cZEzs50vFLgj|+P>GdcQudPn$H`r!R`rujKUiea2
z(TCp|e;%a15{w9!IStG3N?Ho-lm3+4p*#Czkmfv$3vwKhB*hkK)E$9(zd!>wtT~}n
zjK^OZ47V{4_j{U321e|l(pUbpT+FCEUi%q3L!a2L_A*ge#GmDCg`VT#$@cSPSpHd=
z!}(Mffl@=8BAU;TI!L0Vo6e<i6r++xBgov%acjeeDDfHERqM{RSFN#+>1jotKgUf*
z;TwyU+vLCfsv=mF9Ba@J8X2sE%AvQ06y6jVY5$iPLFHbSI_^wU7|hoxxoOa!$;cK<
zk74c*w!FQ@VrFDl9MY(F;xk_L=OvrV@=*!f*Jq-Sn3HMjzlQ&D&w!fmMCnGCN<;#Z
zzhd!|?K@k4xA~VJsUM``_l|qmJ_~Iaj%(91T3<h0ef}K)ZA7x@q(Qy#Bh)zYsuuXT
zjXygv*bhu)=FK+O4>(4LJ}H?dTkn2khQmKIzngFILESy*>NSNm-Fn_gJnd|DogVa4
z6wvVz9U+Ng@@;rq;kPxc$%ou~kNE7#YlIcF*7FV0z@NSE+TQPq3kSR$BmI_MlrZ9p
z{Gc!yGI~0U;!dYnV*VZt8+kyg0ru?vp#X&Ca=7%oxxQhxllc(&Fn^3pC#|x0!AwY9
zY}3J9T`t|3)P^lG2QYuYoA4>G1UT%gO^Sm#-*(@yC?;t?g3&;)qzRRTn-iJe&8SYj
zAoM&)*=*hOh`t`EX?PXQ2N#HS-&n;{@-mN{E35qOa@i(?2J8KzwLGlBF>lO^WmDzZ
zvgwM<#!HBd=T)c@8aCiNcgGbuS{D!)cwD8~pKm8NiF;+cf(<I2s1kIF$Z+rlA_6fE
z!-$Dqa-CxluLU&YxG5zze^@AuIskgW72Cqt>5XyQO^w75?^ZDTShFX{vX%61_xCWL
zNSJ#puX*=P(oc{$D~q62)q7@ht81#K;BXIuz$+B|sIcd+o2&WZ*Mlj4<B;4;apr84
zu3yW7@xumw5<FXSSQu{zq}C=0&Za1n$v;VCT1}a9huz^l{<N$TH>a$ZP88Zs9Q-by
ze`K9P#nx}j2OtU=J08{6%vRr1uiYQPy<}K}&Q|G(QOir5qmIL8_Dx(Jz9$}g+oVBU
zBBPwFpw=3(AQH|-5d`pMEHQIzDXV14U*$I1kt};I{hM$zjve}da?Fpf%gn13$fZol
zern~Ma}|`&XIZ284bBFisa}nyJ(&?oyc6f}lQF_ngTo(UcwpQfJxi3GhV)CxFa&!3
z>^u+uqF&x5tF!YDwEes3bYff?C2!o1Ujbv>r6L$>zpngo<pynF#V1ce2W*hAk?`<o
zQdx&+8LGuk^<#MaLqUm)!<JwGdS4(4MjUIfsL_{=jn0m(&%%15Tl?}|_|HsYH1O_%
z2~a=x>Q8sK8u5qyxG$}R<12SZH%0Pd00}>hjX?m={ctDFALRj<a|TNWiLWt#@-PeF
zv%8Tg?pfTXSJ=e0r0gLg3HmcpWt+y5Gda&I6G~va6gv7ZehzO4C>d?Kz8FD+A8F&A
z;Rwei*{m-fYI#^GC(-+}$we%fE^=&AG=rjuQzpf*oTVJftUiSQ>59%~k;t!aD=<aC
z{-@b?cEB_7QD6IF@Cr>z-eFNJ%WSx?i}P$Nb%ZvaxBNTKC!g=&yB3uq0Iw>$Ujxd+
z?u`~S##)lj;r($~Hh-vy^I^88O$8t~B}Df7ZkKLG#kcivZG|4De!~8fnXN=zk-6$k
z(6i&X7q2&)*ZaM@Xe<5hquB1+Ek*gpt11Zezbt^X73p#*5}$6!$cVyM+DtmcFSiRG
z=gHL&{h2S%c&VLd{tQf8=|p!<hAy2(vMpEwg1rnoKkR>{#?Qsr{2mtG!D_v}dkePf
z>NVezfohrTBfO%fyW+ES<K9Y4PxgnLNbjy%P!zMgVkOKSv+L<14}{oV<<XeG-x{;~
zEkyN^>}%LNqZH-~t{RO0(`eEct?6zka7$1tGrhmsWPSVA=bMrb{i`@qc6D+wIrv4Q
zT7%*vzgXhZMjZ@#B4Qc&?#^_{>T~G%VpmGy%cB29V7URWlsaRV{AF#s_yd0rvGa_L
z>zC=FA<^RrbddC7BMo$mL#XW`$aKJV4P5(?%sf9;IYUYRMMZc#lMS4n^_>l|Xj315
z|0N5FSOlB(E@Cj0lla%DzdiciVewiC&1B=uHeOdhb(<Tkyubd)jGO@Y|Iu$_rQTa~
zU1Y4aIm7dp#1nFPll=b<wv<dNFXAJ-1Gni_N8l{aE^>w_CcqjSw@ajq<Eq4v6@=?4
zp*c3+TUXvPxQVBgw5T3rX?!|_0lE4w<XuqHRFuxwklt>rW(y6-AAY&xB769SsT<(g
z=n2|{8p%B5b)Q*Oi9D?d?%lwxsA9lyfCb!~4ny+^z!dXs%ah6PZX@J=@>^Zrl?0|!
z#>-Ly1}7_KTu}3}Z9I4lzg|g=!9BsM%0MQ98BN#G{wwiSj=+HFl`rxs5U)&Q-gwEQ
z^-LPvM4cJJ*@I){n)u-((VVrO-q!Y~<9EyAJ`)!GbpoGcLO%W2P_+Q|)(RF@%BMm%
zqF^FwqN>5E00Vw`puYH~?t*7(+ytR4+pilOzUcG{#9MLpjuWncsbE1jj-=abx_~~M
zH0#Lys4hndIl-~-ZJG<Ow}}X5>T`v+PTZj_W!YM~sa~n({uOSLz>7Bd+V=|zeM)|U
zf2qksJoO>=6Q=+;qchZ$4G*;X28=3csr`G+RR=uo8Ht3iJLlrRWn;U$cY&XUs@fdK
zPm>|!MZcvrOz|WYw1I&c%1$!J13ZD&fT~ZeXBHLvL)BV$R}Z6lG=IOq18<ENXxw8n
zTnj;v7Uc8B!@?r0I>7QC;#0qL;uHCDZUg2S4(~zRK_a0L&Vl~PVfTBREB8C^an}~_
zzi#}=9_s4Tr3zyWeDHb47x6+J@eCMgNRMZf>`Zh&!Z?twoeiy|Q3o`sI&?k_a;JS)
z91<Q`RmXAAZMEp0K_y8Q9p3*lhfn-wY`WLe<2}pwN>%@KmSuB=n5o^$2|-9)fE#dn
zf!W;WUsaui7Owea;+N5PjQIFj#7<XqUWYyTRNAoiL6LrdiI#isf8N>%<xH^lUv3zc
z_cE#pliUt$eS~U*x=j~OAhC~2$Vhm$IFy}00mVy~3GQYv|Fpbnd@5{*=<!1WQ{CwA
zg7|^&46G-8C-)D5zo&);22G{#cxBJWLfC$&2*{CqBr#N>FUL>9!DU0?MGn{XbD>Vb
z<=a~ql_<>ff_&qON@qZbcer`oQk4IUoW&b4;!-qtJ+$}u9snT$kh;?prai9xls<3E
zP2+<+)2fo8jTUh}P)-h8BgEq;qwDw35X1$9gXo#X&G1`zda^(n{f9Dt<M0hvt&Jj4
zK4F@eP-Tay^5XE4pE+;mFag6sfFj2c2u7das6}<Y))77pYo@b6Mxw;v0|%!~8Xivh
zXBP7`9+FjL67ucos~l<)hY!Gckoyv$JEL_w>S`|1h$#xV4baFgMRGdhE?x{MhfI=T
zWoqO;xmPvb!TGo9{tP!2K%e~N7u{Dp&jd0v#`Dfv7Yj$d5wB3dqnumXohW{7p@==Z
zk7vb~apw2TJo}FB?~&b*G!cYJYq4(VLf)Hj_3kVOn!>oOB6?4#+d;n5=-{QjJ_H0+
z)1D^oRxLJGy@pDp^9gp2_4FqSQOSnu((McGOLMK&Mpnmj5kWW&4i?M99S`R}u&Jns
zd$BR$D_sCR5!X)jj_%7lFp(Sfv1+jW(~wV!rg<4*>UYkO)BsEi$#tHcf>+)bRQl)*
zMcsqq$*PJ{Sevg&cMdz-R7Dk56kybg`9`QWDC_f_BfjbT)zQy_a6^UX!t|nho?mz4
zC($RmI-F}o5=bn3zAh_4Rb9DUP=4I&KGNF=>W0!Ls02L#qUVCRY4Icku5sPm#Z}Md
zfYE_+0lg7p(BPN2_!I63svkteSInInEEYzO98KQ}GHM<)$SGp_5Bc<CdeN&oM7#qU
z+F30T=r><@_qG9#Z8omLKX<TtfN)FeKLJ*&oNeXutloGO-F;ZT-OX~)gv+}#%|LMp
z-<I_z+BW)6w`0rNp1X@QdoBL@KVg>GZ*6NeV<!(C`f6u*Py}>iWrnts6rlJoj@yUk
zxQU9u54+}rR?2?Zpv%S!8$1kgC7(#b^(U<@i?Sc6uO3%<Pqg09W|HzjKlQtqN2dGR
zy;d})&qLwWdVDtE@QI#V&@ntlT^(R-rzY^Y7A0)gk6QNB4pMk|FQ@MNM`)*8(FOb;
z_asp~Z>>6u<j^MUlrQ6n8WSgp80An;zL`)^tdq6<zI;2$_i;bD@aJ^TX&3PiS`z8s
zF!%#X!Lt&x)(_Y(JBxz1@Jk&Ra$$PSUG?q~9%(>EHIZ)2$RPOVA8R!{^#bBv>62C^
zsd^PB4{QcqGxoC}V#kWkj1_)QUU^yvXELf0vT#~sh$}CTe{BqxxpXqr4yj7bSeAS^
z8=TCZ$Q<ve`sB~)M!&|>;BNaF^tIpyWfxr0et7M=o!}FuMa^0wI5}|wRTD;>&NikE
zkv_lY_AKqKV-#M;bhr!JsIKCI*)Vf^FHNs7(;MNqv%2j#*@D~sI!As@Rel-O1WM`M
zFcOE`>wh-(Za2^FzI;Qe&d3MVoG+_g{540X0AHHNKCgJbB9pAWbPlSrMkL)_A>>Sx
z@TR8u+2!_SZYF5*Xiw=g0=<yYc!v%Z9L60Lm*FdXWXNkG1kN;D`nKt~erAT+k}4eV
zOY<N^?Jt+BvF-YNY-EKSKQ`iIB<^I6u!o>34I}h&zDJ2YWC+G(d@K~?SGt73vgnSL
zh}RpVKbBG^E}boHXt#lj-8UbYubv1tRWW$8Jo@WT1!}RC)?KNMR<K@f)>OM~0_A^p
z(;I7XE<5+?xt?sU;x}KiDj1BE@Pi8<6>*Epjl;3Mrn0y>9@Z_z1q#Vza7X_|-ManI
zm;3YAX}p!gca2wlX?G{?SNxgYa9d>+;{cLE8D69ywwBTFQughi5N$Y*E@BY-wxS{%
zxPA3P{McRm5zjm*(x!Lc9zp=M_HH<qSE9;g-##H~BX0h3&{YTqKJSMY46j?4t65Q?
zH3Spy0Q=5bS57Uf`F>B$nk6@xb3!U9-(4yCrVK8EH7tjpT$2;WbD^mSSqH<<1IdDK
zLU8JsTC?B1V9*Uw*nd5|0a4BIMIq>30B~$d(luE=*z*`XdUl(={OxC$nMj-Z=PBr4
zG-3fAQYUbw&mo%II&oiF5p~+V5eL$|4I<LFp^xg$npZ<PmyT^h>V9{xL@bwkwrWe+
zbQXtk8g=HuX0#KD9kC7b5VL+Fn42CS^tin>O|C)fI6Y>liH4VQ`iw62M#m@Z?XP7T
zh=iU3;xz_eOCxTo)+}3eaA#z6ijWC>*@K;^43-TC2Hc*fFAuqSUt1g)cbGj<$Ol~3
z>i_&bqWZS$E#5!FBrzW7l0ZVSFTsw}Q&s>>lwOUz87|9GisYWhxRy@zYs5_^TpK#8
zhqc}S)4~Bn_$*}igGPnVt|Y3?&6S=|xO-I#rbxn&@2jPno++U~9C~SS4t^%Lwu_9|
zgnKv(aR=H3bc_8izRIfwEO_8a@P|3FGwcf@*uZ)`9Z=f4BKIF&#X)yUL#D*Fi8b4d
z&wfsw@Y~{ntd9Zl(l^M?m$2>Ct#vW$4dX(_9t}4K%(XvqFU!x{ylaQjd(PCPH`n(b
z8*Y6?0f)hCy|bNNg~M8#u9d$qs{n0Pqem66xJa5jQms|K)eZ;J9*hgFyUL6a7yRaS
zK8Np<PSZ@NI!^<<7nZG#O@UxyMwL%or>@7pVX@sXY?YlCs2rs|MJ|yn{v_FuHZfTZ
zB)JKgB3kE4l<)~e>j-C{!9m^eYLli8bz{XdhMN5NH{AvP)l7QR{<_Xa@{0~k31!Wy
zD8z3AWSxOX&JJJQ>+qep_=03qg#%s|WT-s&YZv+Dfeb~;)Ee|S7;6(XT9SA2bdt|J
z;d$T^32z^X5klj-x*5cgNmaUzfD;b|7!31Ur%<Md2khe6lO|#(U5SdIK1*4O-&3U)
znNY1uR((CJJRmkFWV>4-hPig0h#^j1Ol}kIJ^xX~F`-l#>!IDy-016P9Sq@)T?AOs
zLGIKUTXZs2#NL`PC(XUPLDZIY1N%&oU{<A!vg`r_57kzwPc)}1O-hh=emxrHA(7e%
zc|)oO>)X3r?L}I8WU4%&oFY595(>e8aFOKEtI!8eb>)Ttb1wQY<6qjsV6Z76JSpGR
zA)k|9Rjj=q6u2uBVS;X9KZ}FrT=vC5(pNa(47deK?QFYh<AXN0wBF4J+*R^{cB#=c
zRDoE2pDx8Pk#fN4s}5JS*X5z_;UwW|qkXN?i-XLiz)ZJB2*p3-QBLA{kF61Z`deZ|
zFWmuSv2a;<#{qD`O2}@<B}mze#g0Sgn7S70JC$A2fUE>Wsgrh$6C{iKq*1mj<q`Gd
z<@0Z2bYm7X?=mcsP?Q2i!0bY!w!U$QG&X$n-6@^$MfTq|KqxqE*AYU#KtJIwDkK&p
zs{8qhsvl4*UyDX8u3sxf%wm_-)lN!qKR~%*DzKx()U;fwC9|L<<rVzmhL}JAzd;}j
zT`W<3t0NsqK=@|RkLxbikUy#*R+tK*h+=QH=Zy%?U`*i;)1l9BB$mir6GL7e#z%&+
z_L%FA@Hh3S=MEW+cL6#v;*Zs%M?*3?e@H#djs-n)Py#Y(ciseUXtgmyOo6r!Dqo{!
z;R(=@mp|=`8R3N3)#>^mYA~}02}fvTIk<e$c8tb|jN!fc;V2!DHBB4UR;(D+;n_%-
zI9+5A#z;WiEVyk2gDxn)N13#V-ATJ+46LP**?3pq-O_#jqUpL=iBfxXqaR@%S!P!&
z9MeCLwyL?{ooN4<nN;c5wegs~Eb|pj%aQo&qo5E&A_$@>5(Sl+l|h+S12A%(QEptA
zeuq{4%Uz@Phk7$Jowzl?Gl`cvBQ8}8<y(Z8JW-MArbba2`STqR_eAQG5DTb}?ND`o
z{`v@`HV@&a<6pE^XDBhkLYe4Slp2>nAIeEv{N7|J5n*u+BRBbKNoozP<`Mivz^w61
z?IwRIHfrv*<3Q==%`U6LO4Im;wsYRiE$>p@AMJ?<V&C4U;9DlJQxO}~kjxBeBy@2V
zcBfA-V&vP~Xvx1ux`UYg&nSyM*NoH@(?S?i5dK|OeV!}e4Zly3jEAtplMp8?<t{b{
z;{S8o6i*VB((ypTQ6!m~1+3Yg5LjGW@l8;dj?_q$Y|};d=HAR9M1zj2z;f@~1I&*I
zjF}Ttz?V{x!j$h#H$azJnQ-Z(A2;&)I4Qh}TTl@m6C`28k;|#ejSLQi`M5_g44J?I
zYGwwr6aN{)*PGNHQJBiD5CjtW6?G~8f!qk10au@|M*NrA=KSP7eM)OYuqXR-c`TP#
z0iNCkYDD)LD5vdiWip=|k#gNi8J##_5aE}Jq|^<(t$P9^eRif}uH3oMWJZ7sI*ZS0
zRXFkbqzSwUDgB%jcjEe!;uk{xDkibrD;>fN7CPD_x;82VGu#~lzB!#$M+OQF$$b%O
zKI*Ol=or-)C+-%<vcu6JeHW`r_d^_zFr=zvE5RLvW?U>gxGmj9DU?$yLO)?TP)2S?
zaUwBme;pX;NyuluID^ju$pV}mZBlH*!~Uk>`Ze*(wU?U6V@lz+JYn;3PRz6`PS6i)
z78<f%%xEV4gq89QMC|Ta0Z?Cz6oaV*9+&{6xr1Ysw>G+UDJu2d`iiNqxMGX#P5#T5
zbPryr&|r!(3JMB5%J`2Ap)nybkaE$*6!dZm)KS3*>T{Hvh7)u3$M0Q{?}S%qoPv$)
zjbE&`uYCMBa@rq2S6xS!mY2>w1jPcl51O9y7_dBsIp#4>MW5ECHK9@WbR54jFri&r
z+QsYk6DCOdI+y~JnQV?wz)CQiBgD1Ghl1pc!g*?U6!#0OGb9k@t;b6wYF{kQe85wX
zaXnFdk~q{Tm=0xhM`%l!AlOsGk*z>rxV3xGqt9gSvl(42i*XOr>fF({x9*6QG3LJ#
zuw?4NJ50eWNFJo+TZLWU=s(MlsiE`LJBC1k*D9UgMb*)n+G+EmaL4e<BWr~jDj+=F
zhAW&OSj%zB)H%sK1URh(+^B{Sx!pGik$kjONd<#c=@r!2LB0Eg53}$!!&26?LMEA)
z1$>8&nye=m>gATT3g&A+$YkcrzcJ!$w8Y+oWf^*f20ki;M3G@BADK?ZN7WCFkQL7s
zG@99cPk5+?`tYh5Lv5$Qtnol6##t;3wGZK&56z|y3kg>JfkaA{?8~oE!n&kAYsbAP
z+@Do(0)$Yud#T6@6g+2heiM)9EH<H{LK=ZMEodRk8hXhW2!sMrPB~Y{bm<jbW>aen
zLP>?28ZAIe+MNXA-bWn5d|X1$KSTm4YpEIamOXD8dA=K#noty8&Qz~vHAG!b*e^Vl
z!SZ}s6t^!b9LU;NB%|b$JOa==F4`5Jr(Az6%J`aoxKSN$MKe;|2)Rn!3L_ou#Z``0
zg^+5l=QxwivM&*LZWS`f&fk=nIdQq5SDJ|IS_;y13(9xxL7Cswj=5D%fWN<>zE2GH
zH0Hh6{FL0z(f->faL#wgl|N-xC^j9zXZ-}HM!&Z6F_HW!iNNhBp`3h!K5Y>SmG)zO
zn#IGvwj(#3-ljhA-7OkYIwdN)SWUkk9kKge#^!W&xV#lERH@1ew4B?~wH_nmddoqE
zHbQiM%<`yVJ>e`#FP2Xo?l=M#fB+Pz9KMev^#vWAlyz6`TP9&@<Dny2O`OsEC$=0p
ze0f`y+d)-nJ!g0QwRHhnUW1Z?>>K1QQKb!<3O)ZY{1JRfZp8#ClF6PpzNeS!H$Tgw
zs@s+rvdrq!_m%!f@hP-UF>?R%;2)7vIKI=Rb{U!9#2=wt646qk4%e>VzX|oxh|zx^
zE1RdB#l_yK$~onTex}`lr%E%@hA3-;x`MDdX5-fiX(@kpMkp*&f>#my$VJ9x43WVK
z13CjiTA7k~taCkBFUU!rlFv!p{rE)DtofI!FpY>I30}-Y4Dzo=g{!y~_m{7_KRo;W
z!~&$8xd9{Hviq}?=SFyeAkAfww5EoZB`)fps)~XLsK<=fI)-uMgZCVvski(?eNJ3L
zI8>){jj0m`RNRwH0_)*|?AwwUys(1L$K#({_l0o-S{15^<AG5ZR=EAzO7XEZ#Wwj6
zE)n0REf^l|DiqA6+~I;{Jl;tP^R*OUSxw9GmiE)#(WTF_WDukJ1Z&hkLgG7@4MQxk
z(#=5ZS}yiuV_>k&5EQQ8Rs{43lg)bQgjEW-)8@=Q5pDi3A#8fr7hjsHz^30RT{H+t
z4wV}$0fs?624{H(I|<lLm*;{ABX=g#zeKbNw*2rj+t8%-(gshP*H;EnOa}h>&2PKi
z6)U8BQ7-0^E;4KiivWglxa$m96PIxQDwoA&Yqsew7}^N=9+aW3ylQcOB1j~a3D4=l
z0bM*0Ho0@5O_~Ic&XHIi$p~{(peFv%oW2kyE0JRqCctTl<1hdXN#?H<gW9urr7l!b
ztcEUTBbk6hSq#Zf+;br-rPcWzUsGTvjB2({_p~3yR-uknQ_ZSatDg$W+IxoZGiP@K
zf%utg@y$;5Yg!`ha?W)^A?<7_#%dv%S3WcqQ3^dJQdy*`P=0syT>V_dC>tZ6WKQlm
zgY+pZF7siHatAI*Q`=TJM{|l&jJHoTH8(m-0K&$5ry&g3en`DSL>|>cGW_MFeq(kB
z71m@{K0{n4$7(FBb-1^F$O&a{tL^T6YdF<AA^Zlt)z;bSU4&kEq$)=afe6QE9hh*r
zvh>8R!h%a^6<o;MQxHfvr{qe`MgK$^Q<|i`@Y*%n^f?s47(&o`5TBrRfxqT$s;{2l
ztoEpJLh1erhKS^61f*U>#c!P>wBlU<kSqOc%jMbV;DVaCoypV~e^CacpZmh>40m@V
z%ShLE1hptCNF>7k8gN|?)i~qYKj#bl^N@a4K*3noGX(9^k+WNtRc$p$a9EDxJf)gH
zqI8CE-Z2aJ1Wcz1^?c$Vh<qzPEkI7jKGI~))U2rE6q^x63a2#0y0?Zavw@aUD2PCt
z6G%q-{iDJ(tV85sA!P5tNSoUM>nxH&N?>1YyJos7TmB2otDsZ~{((X%IU8u6mGXgI
z6B)r)p^VbJkC3X_tpdKuOjKBIS!$C+ih&}Fn#v^temDdvsvm!$S^Ov5>3Mz`XCaB6
zNS1`O=u2hqL0+LhYwhWMl_tFayhbaZ6fKi#GL5+=0~YYpFghwd6g?9Cv>~|{Zd|S5
zK#&Z-LeTHhv;}l7puZ!nOG9!OdrBWs@|Uux1**o<f8b9r#JxU}{AIBCiupjd0gZtg
zaS8IoZ4)iJgl)P;%6I97w4R-XQ?rjcXBIuvs9H#r+BY^{v|JEVa9%FV7A|8=yLK)S
zqz3w7%4GP2urEF;adF13uR4Kbv1i(1KhI<d3mw;6CK{qEKiH<MOqc$0QVWT~q2NQV
zJ2T&0YQCLXVWsf{pVwcuZ}#aL<F+sA1<Z@ed0I(|I~3Dg^kS8gJ!09pSk0xX`oVH{
zB&mmj-F<KoB!#mi7<_luPfxhGB#*d*Ig5|uR4LH53$N7gP5zd{N1L5O-nMs*74N3F
zcYK;ghD|y9H4i>vqh)TGze=~*t<OetiTKMSmncDbaK`WohXL-w%hXVQj6~=NeXk8O
zk5Mh!QsrG(RLif9gVJRyzRwTDoO1$#UCYY^i;?d&nmvW2;|W}>Son%*n>F3@37YED
z6rdsG?2+sojPhUd@v=|doq|5@Oh-ydnR9Eg6<hob7ihrECrxf?io_MOaQUhfO;&-5
zhg%IXZB<5O<Mqffp#OdYi`iokU4{e5V6AqyZKJ@S_cOD2i8&j&z0)!1g`zTgZ?<2i
zQG!KRA{$M}>QX<i1%B)6!$(kL>(xe@K&?izbQlD%go`g;UNmS?2TTN6%9|#*(>+N^
z?BhAHGb4^a_AxI|OcYV1vKM<0xh$DwGN@J;wwRRRHr{L)j(uawO*nMpIU0B?%jDE4
z;e)KB449Jw?LI^|bBtdx{6+P2$(_p4FjAE+fo!6J2CJGM#Xy-HC%|(W-a!mzF(!Sj
zaZuJo<sdl4y;nPvDsYj)B9KgWlv<ItfCDjW=c8~(9WunTJA2};TB&m@cY|sw+94?{
z9Ee5BhW?<r1R68;MM@bnPTmBR0ROStZ}IDUx4Aa|dm0;db$IWlXfUZsceDmBv#`vf
zGwf*nJB_>e^tzuzg;K7K<-ap%R{1*fx<m|2e9^Q!u>8*R^zAQUa~mFM?*~B*-(*1-
zo)T~vkuikUV}$=|qprs*8#Gu+R^YRRPuAZ*qOlbRumsxnTe^s!DTn1ctBU4CXNAE;
zguxnW3(@T61TKGm|8fs4g+$eeISTL;nUhT(f2zz9<k)fOs!)jtUBJ~Wu+ND8>+63>
zXqd;`Uf~=eV8ix;mh=#pMI3zD*jqEI$Pj>?OapwKm?Z}MyzpXSP*N+DzfjcN*&4N~
zP)`i%9f8%eF4*@JPz^zj6;RDK^6(wlt$NgXoDwJe3WXo?@<z_vS-oyf_W^&U(MV#6
zSviB3YjmJ0@FYw+;D%qgBm(u{V=cfb{95Ujye@wnn}6SWxBQ%w_0$slahj>Du4br+
z!?#{-7t;=H9h)1NPuVd_*^C_zqe}8uAGSXU2{8(eVu>k;qr7^DAYr0FY}K!#k%nUJ
z>W7oPn6{E92BRpae*miok$!cIO;OPrn_@g?-v=(-zQ;PRN|sYv7&uV%=kmg10$$5^
zPx0T`i!&%YhOpH$LufO|n>(~+O>>rZIXcwp3bXy#+|d}gkx>n7u-L9-bJjByeO&Nj
z7p>(LW$y~jaK$=>*lU4{YK~EQwTqyB!Y%!>9erEe>SaaG^k=oF=$mAySTuhxwm{X!
zL6#YKV}^f=r(&W$pd45{tc&rT=6As2>HeQmj<l;I=)I@LxyA0(S3ZNSfYhK6U4udb
zmcSJ+7=JVux^sUs&{^RP>d9=;@81_>_*QI!WA~+BwW~K(L0O83GCw6mptek|l2vo6
zma1ZS6&Y3?A%>7n3xi&xgdED`y9UO&-y$YTJXBnPe4qdK(8p4R*Kp<X95uU&GERay
z++L05X^fIX2mLI|&pF4J2<ilohr>&g`9*yY`(90>EHUo#n2R)hVm!rug%L9;)*KVd
zNNto@Q+8#x5d-gISY<5U<L`O7lf{sD=Ph=x(Ogsfk<J2rgdHIjC1cTNrgNI&y?zyG
zyu&=nrbYI}%gxhCDFRxa=Nnb(3Tp6Ec2PCiRZHs*t3HqmVg1`B(-yOvGHQ%yGpHQ}
z>>;Fa<O}C@$iMs3k*+Cc(@)(Tj2XmS4i!E3E?2tf*4(e2{&OXQuVpdpps`K0PP+OV
zS05($I|RvFBY0=BNa~MJzE5jJijXFUeCDla+n%$^1su38P|5&TDOL5$=<gP9Zm@XA
z2NOJ$WCH@|qFIQI|78I%O(B$8aR>whIL`~fR>y9cx+RW<jiu%H2VL;NZnfoq%}O-k
zRlbby<v|Bw%-&D6RCbgHGY+y_35hZFN!Z8GP=RYUU$q5ldkCByF%7BZ7^T0cdQip3
z^`}=)HB5YjC5EpUhX4OwB?1AraUOQ7%BLFD*6?4Bp>eN)blgnY=yRXGhk4oYnEMGD
z^TT(f?&tN-K!KMrmoL*ogXV&_nPr%rH%;4!ww+X~u~y6rM-lmLb}Jc1XQ>{QPZFuL
zHKG(UKy!b?71tA=5SqlHP~cKC<34{W>;Lb`W33x)cKd0gor_ZRDnD_y81BAk^w|Sd
zXII*|5qxg9i;U6vmQ+moM>6{aSWZ641zloX`ac&5s<qTZ8;&q@jF_2+EWVTY#8EPk
zUo?E_=Xe7C4`FW^P}SD14KF}IX{4k>8U*P@cXvypbR#JZqJVUFccX-KiPD_{QqtYh
z@Qt<iInVo?=iTT1_7675WUevCxW^sWb=_myS$g_kTUk~b6RaY0HSayTJ4%4}mN`qP
zCYhN*!Aif<U1{JII{ftKm%A@n&1T#8ui8m0hOzLxp!sQ`J=9JcDvWoe3tlNzRZzVY
zupoTMyjhtZ6HZ8un{rVPhHzS+n9gdBw~SnuJQbHWs|+VZJ?Egj72^)>?NNji6`Bs;
zLMNrf>PI>>Vsfv>_jzNs**yzwi%UZej{)aZ<itD_$U$@<4iAY2etkXSp0aJPte=WF
z;&%`zM>h{+T8p-I>SrEN#t?{R15?E>=C^QY_fW3gygxck7QCS0ZbfM%q3XjaidW5w
zic;i(^5BKk0!JE?IA};$`>?DzrFdMJub4J{7>S*JW#FW=RRDjRy-}bdz-)}SEdHj$
z-XPsQeVKXK2wn)=|3V_GmuO$~%7c0CDbfN&8iO<;s@aDHPfiP?sqU?4Cv2$xhlfoS
zHtV~pS#k$0jdyKNfzCjF_(`jVKlx+}ZfG!KIhbFxpA})JkvQo;uH-xD;Eyu5aCGtn
z>4l>t^}b~jE%zrd#i~5*YpC)=D|LO#_EsGDbc&yw!{l`dqk5FFl!1QywolU$JAdHh
z%Y&JvSa`)UkORVm2iqf9N*mDR{O3>Fg@o4urf}4(v(cM@vb!6M-e3da1nYI1?}@1C
zNhlIJ;GXpI7FjqKB)@1Du3e(VKBCILfAr*b189$y@;4-6U<0ONc!r4yMZAoD=BOtX
z`~dO{LRLg0`VOyLR*i`J<XZuJAM`-84@{IfvAlL#ghz{`#yU@DuzO&cT`c+`&$aWc
zhjDZMz;e5;#Ul$*r{hQ9bbrwok9iX0QHo$A7X8N&-|kOU=+&DpwfV4{{PgaNWh-kh
z+Vs|-kOKD&!zMKGi*CL+kXuzcr%_J~k7YNLYlljJbJ9|HWpQ!AgwTg{tHa9yQ$w0`
z8`B8`Q_@1>odY@i)TY99Z32?0GcL3s?BNVv*7H)X=&wV3NL@}1LC{jLdRTV1<HGm@
z%Wv<fmD>bj=~bz+sz6Dx`PBJ6J!1!^wJL685n?!O1(Pds^OS%UAIBbjgm0i5-zyO*
zh`%X#6N>Q1CDQOpKR_iRH|3@88^W7))2xvnPm29n@hG6D1x;UG{wXf$TAQhBm-&-E
z>n+6JnKDAN@kkYTQ0CBY$F=^d;2Yzwp^;D1kBPG3HT+S&%OX|=872%QiJ6K$SjDm^
zUPDXZ2uKCMrz4IS@@b8G8DsLgU5DfkBo7yrfh4@7g+zb|{8mz2Ad?6ScG-9CF<AnX
zm%qc45b?l0DK7CEeD9hnZMaJh-!Z5bYaFpAGog6`Px9U+Z2rts_1<T#EnhCK--3x6
zdYV#80!@Y8PF*C^-V0o0+b-!c8UdSE&SD6-UuQ)}tJlTa&lr>O^Mr$wGn|O`USISL
z(qT^`Q-6J1;5=Tq85Yatj3k@T&vOJ`P$|QjN%0CF%(pF75>^iEP9pG{z$8T$c&`-^
z!A*~p0yv6><sw4s*3PvTPYxcDNg~3FfGZ$B^NX7rTu013Rwp9WJ{yb5W(s&{1i$qn
z>(mLC?}syiAv4dO#&-P#Ng>C)Jv!-WHq30X8AT9_MUEJ;oY*9)&H5ScJTWj9C7sOP
zs)6e%mi|aGZT1~8h|^e9lEgya{(ffg0^F~^uk#Hoqk4uLCq4@<<j}K6%{UPQ->^#j
zrUXl)5&+Ry9?cPtrF4}_xg0u!oMp182ZVr?4nxrHy_{VdnDoG*@rvAIOa1!9@vCCL
z>~CC#RsT@^Hxvwxo7Y?nFVbf31){kl+N9*b=g9Q^2(-%wktTvWuu=sM4Ec==oJ6W1
z)v-qYELLACgC1H$|LbCZAFvT>Mhq_}<EDtU`{~4mqXinB<ohSt5Sk(v`=~b$3;vWW
zy)`@f{Q0KZXULvGyT(*sr+Q>2h2eGanymNY=BXn*BNj1O(;q=Q=^V7Mef0#77YSw?
z9b_ok*d0Gklc<e`;T1+|dlE-c$r(Tz@ci{YbT4NWdF>AFP$o8CmRgYyo|S)pEMwl?
z>`i-CeivQU++X-;vA-gifST>@(HBwRBRt|mkpFrXc(H)pnW}-=35x>Jp`erNGTTR3
zSZPvpdF5c|#TTFs;y5fc(g?O6un>q1%LXZI(-^kA3uug{I89w<?&Xdj$blEqf-Hwi
zA;N10*y|7=mhPUVq5O60czA8`s{VjL<h4MStlkM_&RkjziY+h-)aw^+tsTC3H~n)%
zWi-yy3bCA4<IvLgK8cEs=2@-Tbg)H)P4r)e41Q?PqTpOB{qW&KFTo*N^wW=Ws<ez)
zbg&7+ncwq_aZVhLcA&$EFW0G&>kcPe{rvPxyUY8UpGydy|K}cm-_YL%(SD(cW^=?K
zT49@g*VBA^JikaogJPH-1S*TKbLV0gq>D^}zL&Zz`a)H^nFTU2RFO<i`3LBrVWbPZ
zOt8iH_rYN^Inq#TM3V7SRiu5518z%=g%N8LHZo_u1J6!W?ZJeVxL}!+cm`^pYZqX|
zZx~ml!EU{!=Pt_qpSRNJ$b(N}HA)00RLho7nXOfD3)0AUmy?aGz-drCgH2nvLB#J=
z`-L`|yh8LikM`jhTb*i#ki_uwk=$Z4Q|0H-u<|zong4U@RN##0v7C4%u*?z}d}yss
zy^%|_+JaaMSCTa-jKMfjlPgZL@_6hQqDxL``_ClRX+DOE$Y<~+T(j0akc0$sV5k4+
zRNRa$f%%kwJ-WXlZiClFCB?VB{sfi2wC7p1HjA>1@#VHShg(Q^AqL<H-)Q^kjgYws
zE10unGRpfJfp;)xCZKI|$c(Sa(&$v1e*MFbmGK;k`rROJ<X`vw>q96U@gAe1#*L1u
zG&=8Ae$a0$e?E{pX;Z1rVFfO{EdrjS3aJVg>hwDr$x{B|6*H)?V@><W^WPRKq{N<&
zwy3@Q9S8;>B7q_LFB4Mv%bxu85by6Uj2eNT)o*rFJowf2JV{(E-}cyPyx2{>M7@G6
zArI`fzuhQRMaS_(kLhma(8=b;RJF+=cjLo|58_y??CqmtYSmncSZdj)ve6U)-@^!$
zfA>TkUi#(Jg^JUG4V*Kyq4qy#OazSyLhtr)?{l{#{BWE15Op>%U+*CQv-M@8`(dWO
zW(Tu-q=1@gfw30LU2;5wuHjQ!g=bxvj!;G{Z&<Kc%DncGv|vrg?%=$2Kl&-e>wTN8
z-|DLgSgZ>B$@;b7R{rZkMdvD!pRG65X2<p=KZ~i&{?o$zQ6%Kx9x+qPCBKTLRyuij
zh?SdGn9Ucdl%_GNgpXy?vp+W?lTD?z%I{w91aln%LMdZ5Jh#eU=?{;C;S<%Ban66+
zn19deU#kXO6ju7I3Hg>Ji8cr^kwy@Gp?^1-|FWN-|Lw6uZD5|Rj&#%We@@ijCV&Mq
z%?}2-QKBHf(HV7MBar!zrG#Av2YWpR#Gc3iLkJco9h4iX{$X&By}>Wx{V*HfQ>cKE
zJ?4&)|2I?quW_jaWUGT;ZhehMMUjAA|8{1J;~&QVs2K#t&bBTg!=>383mTXeuo&=>
zkKjVP9!^Juot8V`7+!tO7C7mBZpf?TxRIbB5b~cp5;jJEBfQV(&@^GcHcsnFnM4)?
zM%UeGbh>Zg?Blh@%}nR3OrxojPE%w|viL9pR=M^51fQ3llR+)OSZ=R2v($=JDEne*
z!sbQo7h6iA2zl(&7da3<NrgY7%;0y&a9?bHxMyl~T91vRQ|_NJ@_W(hd4AI0sF=nr
zTdGlVCcySuy-*=W+t5Q2<_=|JsXv`X=SfE;DrNBXt#k#`I4rhAo@!U>Gt^m6#VVw*
z_s!MWoY(y-QYN>WE>F>R1`&h?2lM~7FJKVM+K9FY!)PRW)ux{W&L`DZB$D~vD>?0d
zLFqeZi5*%fCyJDloHvKBTvwl`qN$qd)R-p#J*rjj+VJ)HZc)^1olR1ZIa0c2m7#%F
zl_BW?C**S|9@EZD+w~5U$6@P5i!Q6b4j30_df1aR0gvjI>zy+Att1dbWH5QHC+X1<
zF-iHTuTIt{HWAphDs(l~3KbYYkW0#y3`5!29?J_R;dPRTrIFuwOZJ{4>gh4$AB-j&
z-WQnh;9+cV)=QO6!Va9=v9_xX;_cC#K*M;h7uYdoVENsd@w3sg(eV|)dleI3sMVM&
zwpMc4Ov`|kIUfnJ97yM_L3{R6gwJhn>zEPbfb`gy|6qAw%G?p*pQclypp-;}jEmK3
zy?T`#DI<<z408Xqnh_zKeBwfx7_ma7491Xb&y|k^YhkQyqg9<pLXlZqHi`Oe-a1`T
z%0gEXD7f_5OAa%Y8S(V0VwFbiEFs{`t|;NPyeOeqa4Puj%uANs>P3%D(u|YfFRC*r
ztIL<QRulczW&`;SOKt2H!z6vtl!0UyNk+agtnIgI_$>PLT|ww^4()eZ!&$<tA>-HE
zMOoB(Uu``VyMi&3d~W{8eni1rI|mj<kHVVEL_<w2vFLMQp!3-{00|l;_$N6?t3WPc
zyok$TQ6Cr{l|$=!qRF!stvo68j5j~?WpPA5JfkX|t9LkL=R<4zYCEe2V2rh52PDU!
zx8{S%luO_&8<+h#%_xU)PIAB7w-ssi%V@s0x2Gd~v8bvc&t@j(1om5<zY|jREL%j9
z@Se4;Tz~2<$F}ef9>xD^J;lfX2HT)+<h9RrwOiG0dh=(W?BVm1VF9`C2>+d3{F@W=
zN4Ycz@PA|h-kLPSRfx3^SldYMdnp=7mVa7jH?Kv+ZCjj!t+oNlP)dj9yjGkjROF)P
zII`RrOo4BJgMMt%mnoZXaxF9V7AYj<ciS>C@;;VCZ-}-I@qm+o>~u$wSNR2ZMxAX>
z*r-fb=<)cp1aMAoG39fEk;=_*bzS<q&VRLw3qU}Y3&Vf$h^qnOv@=sxwJEa0xIDHu
zTf;SaV(K3Pe8Gf`DCe{BN5DHUhlI0le+>EX9{*JhTg#u50oH*YazEzc)sIA`M>W8w
zM20Z#{(47_g*Hco07dxRWe8EAf=B)Oc2%lGqa@Z4VdES>T{H~Ly3ESAk6dfwtuK{r
zPHQi;9No^gR9m?vhT)vV5R}b3W`P0Kar<Wi$I=Wz;hT@96t7)MrgJ~|g^i);xFPG&
zru+{cTBG9u<q<fP5hM$dM`E=EP0tEt<D>$~B;FFfd-Gx$gW?Q0Gt^MfBjqS6t*;X{
ziC}6rn5-&vdnE3Y4rc)o50QEgSNz%#Lhlm$=mWyw_F`*ukorpqu1FquHjhUZ5W8s&
z#kl(rdRmFX&d=Qbf`h?qf`owPZY$3oDj$NhhI9}@GI_X$N6{!i=87S_LIB8^BS;fg
ziQDbv+rAWzqR3YW`TTBs$&pH@`01||037cyy<v5z8imF+tkOZQYWpJ;4@cys#A8nr
zq;#FAx`Xy}SCJ7UbfPU8F_EzlP>K?td!YS8tWbe_U^FIn44V95k>#T$jwYd}gO05o
z@)aDG(>PgapohSWYa)G?$R<Ix;+lU7mq{^%{Xi(Lft4FJT8Gnt9%BhnnC8<r>4{9>
zbKTjRl=J71c#<<;#1<ZfI|ep`YWoNHOBsh-u=~zEUgon>HK96tg+iT?EM&Hs44P^U
z(-`uj*j7$61-)GQ^XPULn(LJi5t+L#yFZhOFuC}JKXD9FN(yNZGtFQ_7{-bSI*9d8
z4#^bfQQVV#xW98PwbcN9|1e{aIYfkV{JFbW+O5@h81m=n(2yu;z1m3gd>H!$@2rO&
z{bx*?e2NH8SIShz1CjcB+t@$uEfqK~dN3d6d^k<-(E^ds$72-IcpRp!Vj-{2ccxI2
ztRIiY(<zf7L&YCO)zv(wYJjXofBQ5kGs%Q-`8JXheoJJIMg(U=jIWSw(*t04)X)5h
z263R`*VN;L=s&Ba(o>GDr%Kn0rOiL8L1%RRm!@bwOm@dSm)<Gnv1w%yA?(Op^1a!o
zbcE$$K6~>H?G@@$f|BW!J1lR1f9FHT_A)ugzIh~!ZS~R9R6H1C$|@D&7Lp!@0X;Rh
z^Vi1S7xadQiWBXna9EgL94u739~v6y4HI)LqdC@s80mC<W<~lMS=?P!M9f5HO!9jM
zk0XX^bcnC`4qe1Q<p2=8&%y9t;Vft~j(}U8{&*Te*^d%q3AX;Y2(|0ZBui%s@NA)V
zd0XuP(t@66qbiTL(}=lkNBvPt5{nxV5d_41*daBWnO-bX9Vrai4;M>*{L3gr#My31
zOpZadD*gm<YF3=dFJHfB`G!fq-iKzY-|pLrr1f%BLvgd0SE2{*{vGP@OIoVoa)yF;
zLlWt5>8~SRF`Y@8d{i$|>I0cXF5+;D4snN$gjZCP4aCCo$bV9`gHhjTy7yZM=T~lI
zk<&-djF~qi^C>pCXGo?aCJmTgx$<xu(}<85ll?y*Iyw&+HoN)YGxSK6=!fnIVxkTR
ziB4XczGOOYAV&w9L?mSb#+Z_z*PqGgM_A7}IW<u5nTz=isO+#}Udb?h7ogo|2LpFI
z_*Fvu41(l))QQ}53{jIn9~5I;zd3BLvZ?5-I$0mEu?+6z$mbF|{*J?Ehr5~kNhC|i
zN5GH^sVyZ$U{Xrlg>)LpZxj!gp){leVfj6+OohY{E5dL#rgb;vH`MKz%G@t1HK@^#
zOQO2Vm(ijpBu-OWyL@$@4?`{>OvgfMjs_{RI+AltM5GXAsakIJ)xm{De)lN}0wv3^
znK2T$vDY%q5_2r6NYFH}CMJ$UxpBoMF~Kw09%<ZmXXi+x&tfQ@(6K!r;s1ML{S&BY
zo>N=Dfy>lE-wvm5i+XeRGM_ZJ@aNG=7ilou=e40U?Ly_O$~-?v2}JkL;Zju;oNZ?V
zZ30FH88-Sy7rs~h@eHnWIVJAvDCxW-HdLYIV+O3PXR$Ef(bP?da$_Rs|Kg<xrvwGu
zXNvb|Ts@HTQq8i$C27U-wZ1si?pI&ld^7>DG-L^`vxQA!lQQ=JB!rP&6^J`I=+-qU
z7plGFz7z>~FVT_6N4ape%cos5IVd3N!5E|@s{Zd5T&5nK1?I4j?o7P4l_<)&f}6HT
zWvA#bQ_YjYuw1vc`)Sf4HK~A*e_G3}?BzhU0+P}KV)<WfK6=swRO6`E7rQgcM#$6M
zPqo@d-_`=##B7bKgY(Y8Clz_Ivkf<$D~pgS`;Hx=EK#151ONZ9J9rDJrw~raV}JI1
zEL|H{kLSe8PqMKfNJMZH{R}qK<(L_v9*_pCCz`dEsy^wgXk*&N6^NfV`=hxZM+a7B
zzRWM8iojBpf%aTtU<JGMgrL2A3_)GZanruNJm#{LP-HKo<T~$-dCr%L=0BD|ImKl)
zo_{TD!GUdtL#KqMZ7W=T4L1na0axD0v4{Bw#j5FZ$n?hkVsjem`Mg{_Jtc@k?__Fv
z%h9<uu=+x9Ur*Man*Ma?7zDYwCtv)b`d?M-f0r!(`oUMNBXOCz$)^5W+*YJn(W&ZB
z@@NJs@zRo#)^(dDWVC0?GH_gD{xK;yC7Ch>lHW6#padyj2EU9gc;(wuU8AAn=bCJ_
zgidNpq<J8i!m-6vhRW~|{bNL9G{ywg(cTwKxNXGTQn@6S7a+&^SUH5R)9iU(8cFLy
zaEw2D?C9;%N!X;D`^P{-B>R{kCO|S2cdSX^;r7(<1%g$sOoHSG>i>O}M4t?+2%{Z+
z%NLpx-&sC`FUgZZKmU54TBZ4UVDY);GuP7=uF2ox?#lTX6aQ<V!HQoVBpV+`Wn6BB
z_j|ajx7P}o^Znnq*uQF5W8m4xb0twh8KO<1{eGYb9u&g=BSi#17?Z;$p@!g$8iVPx
z^QMx=b^%aSD*yjX*%^S6KHJT6TTK*3*e*12MKX1+|Ko~&jsivIpIKDTr@?qXtIfh{
z9hMBa9TrJ~HJ8W#p<4Dw`6&<JPp^IC)=zmTwsW<zO>X;)#60%lc_@yv|1f+J#65tb
zq}=_XmJ;N1KQ1p{xDCE`s7r5tC&gma$ec4%`F)Fog!vjpzu8+n(w?7&#u_jD-k2od
z|C)$pX+*Umg%?)pMM;B%)iQM_T=~|GJJ%LOc#o056uo~<U{L;g^^YsNeYYjA%dNlZ
zV6^H}YB`EePie*<XcUrP%4n0m8sKX>{IJFlug>qY@sn4l`CXpC(v{AgSML>5EVp`U
zhb~rY!GEj*g%YFV;i6z8$Sdy;hxePEu1;wB(m2{{ZN_vM6H1@bajoPAN(1zEc~-tP
z)}$!1x8tqRz{)BzjX}^^EUn@Wh2HBUW{cyff%VJ#IJHK5kG^=pz;eCoB!PQdyxD5c
zV3MD2UlG+iP|Tj1qh}&I)BNW&ekVk%)PIsNK)@>7rjNLmF|!!=YxcA|rOr906G$X<
zu=Y2z^rmI9C=cbk2MK9>uFphktC#irQ&u-k4}Q`-74{Xl(td&cfZ6PBbP&9dG|0=*
zf0{uik@XL`Q$T44s7Tgh;VrY=9(2i=^j@eWiMFqP238_keKmovkoaXnT5edMmqxQx
zL$tbd*{HjG40ZimNf=7<M_&B2m#ShO2X)u6G}?vB1pZ9<0OHDn5A$nYJ$^=pb%VlL
zoB{ol@i_%qTfB1n@s4`%u<TZWN~06+q&e9*DXcga`K3ys*u)vj?PJ}SDjX-E%bt}l
zmo{D%hKY`z5J#sZH*Ze5Rv=Yz$HzmL2S26>kd!8rxn*yOR3FabqP5->akV0I>q7Le
z-)#t#$a$GiX<6c$-4Ax0pVBJPr#PF&ckdIu6GXxUBco`*gFuOA>n$8v**mmnuDb5U
z9Pxc|+#TlV{8(n`L0MWYuvD9Z%W9}!Z9c?!{QC#$(L&)3Y<U7Y)R(jT?qfJChI=aw
znoWmCYRty-`_$@0B^h)V+avnW1a5Bk@52R9TL$Qryc8Vqm*=BW&8u23s)NkKIZ+3P
z(s`ZQ^aXr$zJ2>fE&A=zc<m}j(+1&Ro(cln@qVOO>38+g_4dCKE86!Hz3RW2p7pY+
zfgZyf2MR$jdO!@ZAx>8rv6Aw;M}LsTkx7~RL2Y$H?W+zs+MDcCXFu3wDEHXlnc`J-
zfe{jw@4tBaj`%LDPySdfx56K|!OGZHCgTIgEiflFm)<BWersGy(Q?h>AQ=H$fT9<`
zVl@vz;9RbHX(sc^m`T}*zBw+N{r0zK(XhRV)q_{8)tai`jKj#4dU+v_F)1*RC=`sg
zBPbAg2uUKRhe3(`N{vU(U4L9pu0CVZ7zKF4wV*U%Aeu+%(pxB{$0aTh$zL;X)1%;J
zzu442`fn`&Fqs^f%V0%%UxV!BVtK>H+jvoKyVTTmpC*O*w^I^fQYlrC?K6ai;nF9}
zSA~)Yy?Z5eFs;p)So(X$gO?C5BotVzs_GE!rpw-fu6>X}5`Z7(zCUlm)<ldXhtk~C
zlgIOo%!KS<2}FVAqyTm_cuy?vNc&z2!VxqShsUJFE;w<Y#QgVELvcms%qw)2WlrrQ
zZs%Suj~B?tQv0%JRLkbUyTQ<#1OBz{c4vZ-{;-8Kxi7yZ?b_cD@&E2?M6I_yq$5Q`
zLu5=+q~9K|Zg4T)!*30L0+#rVnB-EVs3UFl=}2p=u(K%P%^h?pbZG4FC#2}cvz)o-
z;`O#~$Mq|B438om^KW+Fk0}v1m)}6sx#eR8<{twy|LV}$wM!j&-`LrKLf`=Yuu@?<
z{&~|SrS{T+C9PPR2e2jgd316c;Ym-1;aw57-Q&U1hQ-?Y3VvEvFuiXuW4~@@Q>!u*
zY_}fi`#Z={T%#}>G-((fE=umym*U-ZQn7ve=uA*xwSa%(eMdm`79X<!{vP(+52-!9
z;fLO{io?N~PeM2q0a-TdgpmtAVbu(a0LBA7RN|z6Qn)ft-22rgUu`jB|A3ws{I`q4
zQVIYGmh30BAM5L6BR85U(a~74Y0RzOH+`1v!vlTABBn6QEq}w!+h%q_8_R6=6wrN*
ziULlDNZ>^DdU|6LY8b7Mo5u!&XVF5p^Ku>!O9ZtFYaJ}LvmXCuOIVanA`Je^rC%xp
z_}yPg1N5AAq0M@;hF2fmd$k+A(PRIs0x^3})u|IZBbE+W7pk&k#mK6q!osz_Bx!(+
zTjE5)I8SWRWs3h|EUjQyy+~P7rhs}uhG8&&yUoax)y%(0mBm)CV7+&D*XISVyMk5>
z)7Z^I%MF^R0hxOMa%dmMXB)p7-sWZI88o}8Rh!CY4fk}Gh$t`~^*)cfBu!svc2|xh
z6JnI_t_tldaAao0T7`kv4f*$kpRH@E)hhLQ9~AJ1bbw+>5zeCtXahJ5(cY?p|J@vd
z9x1SGs>YX(kG970V6T{UMD_eLYhebCVO%jRd-Ww31Z;+1HX)^OBu5FY0Lx_eI58Ex
z{nhAP0Mg^O)X-J6fM?ni+#Q7YUp!9Db@-j-!>fW3fY2AlPSZ}Et-g9>;ftH5J}U)2
zaNZV?^f(V}GhbZ(P<8Bd>~zWF@f9mol@2_pd<uJkexuWm7{>6kDqIFN0ZzV79o$Eo
z3~#|-)<A2f^eu5=R`}l14f=6;5T<Is1@eiMcDzk*HvpZ3f5G~K`3R^6{<O;l_K0gQ
zqm|7Av=*01OM&$)O87n+Ku^DzKo1i>Z4!$C3^$hKCmPcHPH4O^Ri;g&n951U1LsRo
zz-c#EOBS^bc&$R1DOpMK8omT<CVqCaEcik!U7uIrztAyzcO!F!l||5XFFo|!H<^wx
z&G~fh#U}#cOlsLgC}R=wI&bl*%ldCk*)+<!#riFxP8}D#v%4$U0NdL8p@FI$D8rCu
z09;lr5dna_79e;JzuNu$wx`OH)_P;al`;jku||mAayxG*Y)=$L0fbGuJ*v14hPFI_
zFZmC4<S&i=!-=@$+M}K_aF`D|hP$ryV*8YT0qCC0=5PjSN$P4(RML{)gMs--=8Kt1
zgNe561Hzpe8ky*hw~N4T)(2SI;{gQ0Hyz7rBxE&LTjG8Eg8)u-Bw5?S5k@+a-1KT*
z!d7l+Yv)w;$*?yI8{Ov|_%<Y<aUzSMQTL0RZZ(XKw<1$Ic1$!K_uq~!GsY;HEiRR>
zgR~5j2p2)*h^7#Etm5%9gkP6XE5wWJw7=r-H<8$oaJa*0CZLT&9*&<=Bu~<hGN1X;
z`Fb=@Vx4NGNQ0}Z=OT@EM%<y-L|HT1iA&DIrB6>-0K=5mcxgJY+P0f<`^1CzQx=7<
zsnS%>!Q5wUtzm=b6`o(CHLTS6hJ`&Y!*6fzsAN;AN;r4V%9+z^6&R-&=n&;TRCVEO
zKbd;+m#1&v!~!OGbrBjiUjsUUjB$4|D8HeNq8*_HNx`AwGe-jo;s;#yQ4^fSbE)uX
z5Vf1|PWJ1U5&;IfvjIO<s_9G;9P-&^dwhGTCz6zDf4+W3c&1ES+4pLl;EPmxBQYRM
zHrN5TddHo+P^w+U0)T#Z4`_D=0LBMuCmMY49M9Wewo6dzG-wPHKZv-;f-gLg_R{ij
zMu9ar!Ab+yW`44PNxOii?Gw1(d6G^^Wb-RYxIYfOU1fzGQ6O^j0OA+O;c&Njd*e3S
zyzbvJdFMf@W7eVbRX``-s>kBni_KqssO%<}0(WyJ=eW?aZNhYqU1`$~3uPU%RVeAM
z_vLleGM7s{_luEVY`T_E(kl|Qm+UA@CIlfC7LgfoI(hgqx!<1@=p;$SvZxbv3fSN~
zL{Fo9mNX+P6Mkf|c%ZaLD(xuci0`Fs^eb(aFda^d@(rr0E~u}^9nq{}LQceR335Ba
z2_3acH5dS<4ybVRleIWy>q~Q#o9EYP+X+x10M$koa{<lpyH|0viffx$e$&FrshqOn
zd=`#KS<&oGp6AxQ-j|=dFmEx8ksg{{wtv4P74C^5H=<QY&b|wVTZUU3EB3DfTlTc>
z8;Ni<_y%=!0dTmZ5qa}tY8^8m><#}ga}2Vqd24h_uSu*C`x_6{W*ZIKHDb5uz`&qJ
z<1TZ($(r`)FV1A|+jbbc#UNE{Wg+7@vv?wM%wr6n9VQ<}!BM(W@T{8ax9(&x@A**4
zJ-miCj3pTKnB!-OdXY;UM1A)!#^tN6POv59bqJ9-fzqo#d$X<*1h!yX!d!UImz<oO
zSVm384BaO$kr7A`sKq_aet&;Rpnv@2$=K&%l;M}tO|HAtfQoiH-B2u1$$9_$1#XPB
zNq2aR*hiGU`@5U-p8zDv(yOyBFk;oME#6yfO(RxD^sWRh5vC4;gGnU(?SaO{-=1V4
zG7)}w84ddlagsv}Q3TmU$M3}juWIAp?02WDBU)q5q;ozRUmqIvh`v@9J@>=dgEI3^
z@5e-k0hN|C5`#`b@|xCm;%9Z~Ve1C<&8r@o6uHJ`pVP>4b!<zzDFG{v!bGZdl$}$!
zP^YyKT8KqRrChAQClamG4wU#_I?bKLB;pzS{@)DXcucJj1xBr$&ym$)*OVI~x?B5t
z?$lmw<!U8vI<gr;2}0!LJ3Li$EYkNvm3|}Y(4>>y@Jh-JyV4C(u7lJc#Ou9_t4>C6
zmCTJC@>4~OazW*o*%;%hH5>iR1!Z{dd&q_ZnLK4{OsVq)OT)zVem9gl*AsG94`WP`
z*X9t5sKmh{uF|n1wgS5`<f1!aMGn11=4!ur%Bl`RH9mvwO)I-F+J{ok0R{pkBP}Ab
z2`|H)FK^VUt4|~)fAC7De>7}jEZcY}p<Kd=DDy1Wwupctkfd=rwHdyN!%&v*wk7=5
zcQ(H_<#fL$X?0~DXK4VAVjPPdVWq=?d75>xRcc*03&*ck6SRG)TxGtP*v9^^WacF%
ze>OUAayGD=SG;vw`G%-dYZ((MFyHK61-42z_WH+FxMj5>rHUo#4@77thJaE-V4LuU
zU|SacNx@6^xma*t85afHjS=|tnN^B87%8x_lLXiQ+WT4Vkir?UZ*UbM`-cafpd<vm
z5LhnqMpreGKmnF;Z`lK7;8yq8z9qvl+Au`B+fPujw4}I*eM-LfQ(%$lK?98^hI$u@
z$d+!etJPv%%$|O`wARp#x)VV^!&TUNqREYt_2;0;!P_4vmdPJiRog*B&>Fkl7KuR|
zs)>=N$!+UZXZ<&bjlU(tA}6WJ%2@5Qh7j-9hx{PhcjT-4a}C<lpmdV#7b=ZHrtZ2-
zF0#B5L-}PkDP(}&NXzPCQ@%oY+hqfB*1<cPYjaEb)#|J~(|>}LzLH~n{khRoK_ZYw
zW7%>y)c}Rsz$Kp`QZwcaV(8`*?KJc%JQf2H#%Y$-afi)_t|rLHT+7V(F@A^6Y>&M4
ztH&ajEukInGx&4HIi&|D_z;3TaNfFI{8<RT?M<bt>8VR8=IP+r>pVs%`W7I-1ntTk
zR(0)*Ws;fOs=Fpi^khdEhP?61sMEDKXfMc~<v`&x_t=`Wvp&3k)v&mm>T$3^3q7r6
z`tl$I+=R}a(Z{zv2o{cRzrR0$@j6OHAfOjT(u-Hr0AhInBvf6JL4=OnprMFV5I=?x
zps`q;=BbY%pI?BqTIk`{*@rLm;kg}-IidwB0ZV*QmhT)JVs<7k219z8Rz)mGojmH^
zf+%!){i+ISO;n@fBgnH_n2Y^k`aAfu9hf`(+Y@S*ZYZNXrBP1M$h6Z`mJZ%m88d?@
z`V;CnTf`m;r<GH+%E@n+{&2CT%&j7-^N_m<I0&9-Pb&)j-V<k@K^%l&5rPZ9W?dY~
zg>4n0abqA!2ZAS`P1{`FRiM}@W3}~J5l+Epc(afOr^+$cKbIaUB~<a{KaL$18eRZn
zi9xCB-;&|>7ZKqeghOp~lq^I`UFU*w9F25{gumVpS;M0`1|4ZrnU!bJOO@Pq2bl62
z(qqyjy1$=^Fo|^65>E7n^7>g8BSAiKDt3r)8a&JU(*=mE<MeaDgd9;u?3iD@xhO1E
zQV?kEsgY)lXJV`_H#kfp0lcf{Y0m@IHha<3SjeY1&l$W)e~vQRf-?N1oe>RRnL;qS
z$2${(Q`L!)-@{5lcB;wykO2#A8|`Hg9F6c6KxCTu35RaSMaG|F5=UIRpG?qG;L$La
zs8*AURd6(G+hs1N<){dwW+`=)&~@>uD-#ri{H&IXMX&2tOyOIVjghRw;utqjMM!I;
z;ic!)Y&IXZ?LHfEx&Dp{R0HEfsIhbpRr#GD?_aR5<RXgD2Fk|28#xkJkpjVz!Zn4#
zzPxqe*F|Z7w;=YLXCzK^NUZdk-+|wkvHR6(Riza1=twMIeSn9|8%JP&o|i9_$$2;&
zLpZXG{>#8-l}vrr+(!fpulqUHG%Z{78K2`&xkCv8StjT}I#b-F&G82l0f~|VRjZ5;
z3}+-^q!zebYUOjIo!(5v8)}PyJw}v@Y$YzcBQ?7|quX}-aDOYENr6Sc&yd~MIq5av
z%C)+!Yrjc2n9aYNV1pl)x=1O8%aZ-{g9Ir($As26TWLmvA$F@UrC!FqM7KRR{MuA3
z9>(PbV#;9u_|^S6YS(XpIYY><fC?>YdjryK%~b+kdMajUlmz<H9&=mk8ELWV)6r>Y
z%yU!;mCRUj<p(HTn+Hul9%cW(BQqc;3}1n5(e>#RP-@$e$<w-~po3en7N`OY$ZeWl
z|K0?WkKI%Y7=c2;;&*?G`3cjQ$M^PK&#soE7En#-G&&WmwgN_B_e|kcky3_>2?U2B
zcm{F?An}|f>Cz3LP&gX_P){7-iJVtU{6}CpNaOc(3je=ykfHyRgWy3q7KpI9K5UV>
zIR7fzu{7ZG(m7!a`KnnkP<OLT!|Qrh&iK1crAQ=I*Qh&1TH}~0-;3<mxl!o`cfw-G
zoTWj#<!-Up$5e|YKOtTc=s>};{*aU=>2B!bbV7fG<#?!7uA^?gY?iLOnSNn5#<c&O
zRX3Jq5}%J;RmW_phmEGo%+ktjHx%OQFBmb;_3m-{-OWh*iNWDpwL;w|$F!iI$nCk%
zt>}zQ+5MScj22S5sQez{PK@Ba0v<Xm+#Y+$<m8q=V4#u6K{{Qet`t$B*Cczd=DNz~
zeH&VO<0cl!k^eRqV`1Hy^5kTG=v%;G2#XrRaI|h3o1SOm+w(#&(h_CSc)fm329lyQ
zjo-cBcsH+}4ClJe9}V=uY9rsqQRpWvP!9sBf=vT01d6fD1ZWD(fEr5iAZU|@R;9j9
zk--}UiC9XM!DrSf1we{_>;>AlOhNs;;s&<~qzg00sm|JpE@u)=v=ocWoq?o1DV3>P
zxljC&n1K4pUd*4qRx}W)ud4mN)W`u8J$V6etU%`?4;-a;2}=)#3(RA>AOq~fWp&G~
z7S|DR7~<3I%`F>HFxeDIV4X&)DS1rraG7TR)c^fih!8tgS3VPwv`Rgi2pU7sG?7K{
z1<AyDNvgZmT8IjNY4*VTPx5)-bOBKx8wtYOrCO)5_a56cCD!cz#k@9*d9pQUU%O|g
zx%kb7a?!`j<;DAAIpPcpol9x0YjmEki?MvLzFd6)?Me=T))cn>=3;`**Pmwx`G=(Q
zS7e*%O_`ng{uCaAw^%Gk7cy3|v_*S~6Y(sX&)PcO^}4oBzuC#amqb8bGIU)H+vGv;
zQo<!hHw__s8v!kX1Yb$FFpqGI8Zrj=vYZ%j9|%(4jWr^cnM?bx4h7zGyyti@uuJd5
zgQ9}}v@OX=U(`D+-K$*4OQhTV{45IT6qp6&Beim-Kx7xB0n!wn6D>S0hC{2c^=^nT
zy)m_mXdS5`csO-)B+Gp_lI!VE+z9BSm4IR2Tg5%jfd=3k$ii#b7ibgd3q<_@4zz|h
z!$cXGpK>2U1URxEAL`h|!+B<CWpKo>Ckq;JFFb>c$>3>f56HRpgGdB*f8&RktPzi!
zn}lUn=X9WJfVP-gN~rps5an0Yy-RGo%62GTCjE;iI5g3+j3-Pbaw!rS91{L^56)5c
zd`IbwN>Lkd{sQ&5y<WO_<I2TUquj-~;vxbLohfi1x>+Z$#8Rm?L?kpkH78K(=A&~%
zH{dQwKi-d!@NYsahJE!W<c5&$+3h~23wTS-pmTuW>^?cyQ(V`jU^>^@{CcH>(7Fwd
zC{ZiMo>!APyN35Bz|%}dFT&EUkhcS*klXL?{2X|*o8xnX^w;SI1Sd5o@~@}u4AoH*
z7_zILZy7Xy+9^*xSuuyJ>>#|A+UL*zo$7H!#A#GX^w?!@LQWb-YU=TDZfW|%FgVf!
zdb%{#of1hd%+lyAEE6#j^E^147c!WN@=0S5AWMBfau1ZD4^%CR&Jdecwsh3Tf2OpJ
zfF4ZVrAYA^5qkF5lrJ?FBYaKnzv}?W*;l{hOKdMbS8MegtmtSJI#7$XJ=}ZEwm*oP
zP(Ir!S>duNQZ7fX8&$ASa<(1Il{5p_!atVDgTIFrwd9P>ks>g-QN%s+Uq!9^GFrCD
z*qV`RDrIh=F-OqI!~!_ZVp{iU@o?<RNfJFipMnE#D>PEpEEU&ENU{Qu82o&`j$y#3
zecvMGKK$rXWS=?zHs#HsjGu&Eb!x<YS<%PZeojKQZ4={UhjuwqF2ski*G_;taeYLB
zwyLGR9a%Vxp9;sUVilQ;gXEYbA(^`Z<tU`X#wM&kE5d7%ea~hSaE)5Tx?QVzDzwv?
z!Rw|KD4nm<bTaRu1fhitL}s-xeu@=%N6%)!1xNdOEBgublh|!YHwu6+o(pSn4dZf3
zkk&r;BUg{nZ4V95s+?Xg&nl|xL>~O8VX%8`T`c&GEKLSF;d<`-(<i0WcwI5!>nB>w
zG{2>srs#vo3{CX{A1qb#Is@WAjQ&j<ITv5PZZ8c@ou5P}qC7^S@Q^8ZXuGyYRo25_
zx%tmxf~~Xz6b`VymsmbmYX-xXhHD-%uB&6?V&@=Fi<c6Wn&p?vOU)r@9q5i#jw|1Y
zMjJUKGCH&P-4B3vT=LtL-YF;YQV~2`4yYCtihJ4K!+I(p3p)SWk-q|JWYA0bj0{_y
z6lV(M3e@&vi*BoA+FeN`6etr|D|SXhEIIs{;>LfT4AJtTPwI&?TE72qObWqVdS|4%
z9TU<am&6+pDuGHK@!<l__$lLZ5}yI1D$UQ@m2<y%)&n&1cP{SAW)R+^i<oy&TU^OA
zLya=ntwi9{{;(U5dFWfTXPk>l%*G8_Z?WX6)~$2+CA}$4qc=|CcmDG{w+=3yvbtP<
zNKIFKD_BO@0o_Z`wb#}|b^l9^`C??wg<QmGU}z*$nVSEA5Lv4wc(`7Aa{ivDpc?xz
zoY13lm(y`~bKyGq6NoX^BX|}^)v1VV0m*uwoaw9r*ADAq;3?r%1tLsGeE&2g7#{w2
zObEet+$ccyH^;fY%ZF2#-J!t~Xs)6vzaK5h+!ykclxDq}$J+lYE$P`<L){lEb6Od3
zyD^<O#ZSVJefb$rhrd8gP@DBt(e9VVK=&BZ3DAkIAoU}<=~E$`vcQE=S8#E7jl;a?
zmH1Su&AI()aEGO7&tDSyd*RGwAnJP1Cbc+Af(MBWs`f70X(Ta^(%e_OT~-1pCf*UO
zfyp-}CGJu2U-{zOf8~ppIO&~ZL+uVp!hW-BM}(>t5p&Z~S~(Y%PmF8gur;|ruBcXE
zh}frnCfb_P`%H~twIrk=`Mr5Ef73D=*IqOcF&>uXbfm7WI?|9@Q+yJ9sj8?8h!#yK
z*Z1DAXt88umQMK<*AdBr{-E7)MNBrj<HmC1=Sjw?T0k!hP$aB-XR8m>GK;{P<o*lj
z0`L_yjOf*-vO)ZJS)AQDvqF<MwfIjZ=)}n-Q{K?Pk0gZ4bV{m!+`?Ur`m_&+8_qxu
zD%dyD-uy@~1jeE2Wh7Y|3QUkOU9OX7lWjJTsOa9AuNq(s1EHU}C)k8Cz-nGOS>Y&g
z#|?{4-i#NxA?S|gVIL<x!P*5=?&`B(vwE-1@HcrQ0nih|DlEojL)dm`<%8t|f29Hd
z5Ki<K;1vOxUCl!`48fj#U@Z8XoB+5wf(At?D5@nj8(#@UEi)(b_)qk}Xr(Vy{B#j!
zurYWbv?Twvf?)$6f&nQef@oU-c|L60D(J$J2<{rTjvlge2H;7}$W6p*@TUPSUT1=T
z-}K*$5mnfqdgp<$JGiVDcf$6s|H1Qrp*=815k-d#Gk`x!(Dxh{%X=F`X(x+bhaPpG
z_UG#2fYbMMwY4Qb>%tW7{c1N~OI+3ZCpDf?Q;ysACk62Lk2mLFdT5}HcyciZQ*9Ek
z=sUJ(0qMy$NU`D*UA9Ikfy&OIh3^9B!xAkrJ){7GQne|LY-}M98l{0WN4h_Wl@%cG
z4ce>SKLTO2KhRi2^%M}$tBAmazEM4ht8jo1jR(}(RzboGJuRTMCAUwhw_k__v1k5R
z_i(Y5MmmzDlU}l;{L|k}Zr$%;^GV^Upa9@_97d;b5tuoRx`p3&e*nK>%gJ@<p@2GL
z4zn1x&=Yc)pIIaL-d|aG95imW@Ns{|v|em!grIf65ODzc$1tcD_A*tqJRPp<3`CX(
zJD;@bV^EMBIEv*P=_$Y{Vfy3T+I-L!p<b$0u`|_}<$GNMnOkTsYPzSDTRTqoJm1;o
z0=*sMt6u_~M1VAr^yMZ{Ku0^R_e;*XLZlt|ZMFYPEWXYA=JW&5yrWQXqm2^GIBKA&
z45rVQ>u{v7nMgIf784VrHfVB*oB>K>DzpChPyHHiPtubc5;H!6tsj^4lwiLAJfR_q
zS$!9vd%r$kyb8qBy9|2zV#q26Fl)>wsmI|R+7v%KX4`kjE~A<VkS2VhVRlUlwfWWX
zWiW-K=k4=r0AS|>CH;!Mf*YcRV>J-Fv%mQs($OAs?}rGJT`6SnX+C^4?R)C6Q&L3L
z`qdcO!t3I|RYF#)SVffE{?{|B2uFC(er0-koeu{01oRT8A*;br0(>D1e6~(m;raSt
zNgRv2$<|Nu0-5990(-RE-zG2_zl+Z%g5ZW79>)D~^TOKNaUT1-LF&GAnlxWQ1)try
zY)>7J&90GxMGnli^?TQ2Z^9!T2STXR`*`}(<+NVeCCsEHUmbcJ3)Z(@KU$_%G)tC>
zOD27qV&+Z0+Ul|{dEe(KWVGMrnA|VciHp|VsFmm6ILvo@-$V?i@~O5FKg8iOt1;_p
zJB-ZC%T_d@uRS?ms^Mbl{h$*@trw{uLpHI)GktO1(`wosyYyTzk_U9lc;r2edcuh~
z%{)h8R+<#ORubFjvH&}rj)$)|F=F@Im3Fb^TdRpHMFzbbr+jK7s6~+N;$@Nd$$7L)
zhQ($em>y({khZ?W{m%Bz;Y2PQE!o7u^QD0L8^85_yiPw5zVRiLB}`wxe(eO#Ox%y6
ziw<LLh3U*gGEcdGqmAi?uyeFg^lyWGinf`^?}#Mg3?L+!wnQmQA)2C7d;l;FuPXsE
z(PY4$>os1ec%uS1EwXXRFWrEU*qv04dlTG=m9rt>Zd%KMvC!x&9i6#J^44Op9W)wI
zE2f_1w2CybQneg4vNUbHu<bB3QQBI!9bUHG9xsR$_VZ1ud@XoTdC^;mc;o*da2L_h
z4l9@~Sq2X)D1D&fL1@(uOTgKoIj8fxD;3@RKAcYY!3h=rKs3F&@9KXevNKsCxQJ~3
zt0Ct>>UoWFr_&4mL*nPdEq696Z(%efNH@-n5iBT^SgkA{*<2~udF%!XsU3gWxxSbz
znUI6mbbii`1Wka-s%`(VcR(K!@+>M#7j~CwBV;pVHy?g#PcOiMw3qbXS^zBb{#eb$
z`(^bz&vv0h8<{_H&BHtGEZ24J&9w=8py~1ALg?*ocjuj7872cEwE@xv0|RydhX~uP
zI9iqgp-?_|g`Q&jxkB8=`2G3Aj+lfLu`_eWERO5$fcgk$qQQK<HC7T7g<P(}*Qni1
zQ;CG1$ZB-E;sFpLj6YW4WdpwW_{Hn-*vpob{>cBeT0jPG=?1ZAFk17gjjnxn&h_HO
zLD^~$X5x~c9wVyngZxuErM)`vmPA8&JQZXNFTe7ERGs0HREQ|{6x%lh;qiH6!2V$p
z?7w%Bk`USUL)NUQ%n=XX$vQrY<m|%~x)g~vM?#P&HEM60tkBa2NdWP|3Zdf@uD0n)
zgWb^v&|(5#tl8-F4$J%M#LQ>!JtFEFz%pVP)M>W?09k2)uUbkwZRsugNI6IB<C%Vl
zBfRC8wnMbxY(i^GN-7j*qQEGi@57%QVh6>|5&FxnINl=bgP&i-0sOH7MYpNhaXeZU
zMSgao#lJTKxW<Z}?``M%^V&tVZrkIMo@ZMtGOUtz4WMg7=a9yj^bh)WQ;5@itySFF
z&XoKEdC;ANT_dv&nRg^mTW97;f#!<{)6s0vNG|XDtIbGH=j3Lo2%?j3sH|H}E>aU<
z_|~uBA5`Yi{XsXuU*u&!P#>MY0v$DRg^FoAHV8e*yF;m^jeZZl9%p0H&*Sd^!WIY0
zAKpk9tiIS&rd_r3Si+z96_B{PD2hxb@w@KCn`8h(9xI7FB8{#wYWKZ%=&=6j)xqdo
z@mp{bzpL&;JDkZt5^GW<DgTa9j1=0?3b7hZOn);-aqeo^ZUukef|!U(fur_F_-2oz
z=VtV=pT&sqLkken&mVhgbj|{dw?MN@%acT(SmwODEo~Q4(kBBa$(erYdUViyh$6%a
zBfH3>>L?X)U;ZR~e}6Wie9}jk_4p5FP<vJ?mkmno*KZxhZHXDIy6<ZqKZIYI|JBJR
z!s-w9&a_>zUgL+eZ5|D>GFAEZmV>B-ldg~)6z_}#_I0NIT|rOzAcYjywW*3Up0&~Y
z)@qAQw~}lTy_x!@IMY7$tzs7c8n>!;8uIqjCjBRk%y(_XOxxoY?<{+Yl<!6HM>0zC
z`i|^uR{5ezioXO^yI&{~{^{bB)|f-ix=2b@o=)=da_hH5Y*b<t(P(qh+@5SL%9qQK
z3PQgW>uQtJBu_F4#_T^5HInZw2vfT$v3R6Xu}XZ{{{-(iD5SiIh&yIo9$zN4cAGQ2
zIeU8ANV2ih&~`#px8-CBnPl~-(ACC;T0d>rq4zRoslWCk(^-A>t9gsVcA6>c>6nks
zg}YsB+6C5Jrn`mc)h9DQcIF%Nt9NE-V)-3>J!)opVxPN{u`QP?-B}c9*Z(=`w79FQ
z^ecZwyDII&G&2=klj@oo>jqA=ty6hAkX5G5I7GMQnpAd{+%kRsVz!90`r4q-n<j<(
z*o##5{i!tCwcPZ5n^*-$h3~Tc=$6NM=UHUSL`j{%ojym3pk3_!-AAqceBZ7@vInBS
zTY2DB0s{G?dQWD{&b$16E_@D#q#AVSd>S_+HKG;S*w^FUX2qi&F(}P@x3EAQlghgu
zJXY;AOLa~o$=w&t7Uy}{#L_BsCAYXgr*(Zs&bZBCsun}fQ(!SOlYf|v9m}VvTYo|2
ze)@f5n&bE2O$dkb@*WWR5@~fDjiP=wMq5V2+<a*>ln*-R20)k5!JnrP2{u(je-w0L
zZWk}+MII4&Y!>=hzssYQ$E>W{T*Fu~=GJ^Glw@@yM<Nc(sJ>aCjg4#u1suyF!h~S2
zuXqcxL76&h%r+U&&4Ki_fp0`2!UNvI>(5@yn;-XK);!Cs>~kyL;Rxbg$Q``TM4l1R
znvP@&iYdaq1m5U;l+9nN*=>KEVp3Nf!&5oyewhOc&H0%M;$!59h0Yfeoub4LDiLDs
zAWw%H^PyNE1=;ed!%N>^Y}JFTP>qw!uyL}z#LIR#_|-`C>)B&OM}(gtGGEepo%tev
z>{eDHi%epW@@-i~kqMT%etoZ1CQR;g7Gpv-7>4`i*)8|-<M#H;-(is&TDw<3_Bf_e
zAsL2`VZ3ME8e;fp`J*IWI^5ptb}IH?#26${3eZg9rlU7{rT<Ia5uH{!Q@<S%`s8w}
zA5sVRcIf=+;7s-D^^;+$mc7e=ZN~=q;Yd<m>U}*SIz_{GlA>irYtrXOh!h-_XA+2|
z*NiHh;sLdoxR3jp#M|#RGHvc-Iqi?Dkt|-nzTmW-ud@rgg&xiXQw<GHKc6}pa%1fq
zG}0RQcxq8FP&Raa%*y4dX8z(%9Gpe*!a%P+x&+mCBD+*FF!e;ghM>T+qKVz4AM+Z{
zakTi;dksYhTfVoOXJZL2y@o`ohMyRDAaA!~R#mq!-64aFW}Uem={DC*QQ!OE{tMw<
zy64Cj>Xeu61zp5~`h^O<ce1RA_w3faXWp$l<3<4;<0u*i8YXQ&KSxs7mHO1Xo1O*<
zn;nFguzSsdSHhiWU!CM-61|$TIw~Jb7BKYr=_Mgp?7IuGsM`^$Q0M9y(Wk#SSd7D9
z<w|5ZY+;>s-iVDsxo@5P8aG*NpgeMXVv~{Y_$BtFt2ZZ>&Ir!x-YLz|H%n6&cD!2i
z!{NdlokWp$f&YrOxU~GSn%TKGuf`*CH7o2dCj+;<)sE*zM5B_Xpwjj1%AF^}OBQe}
z)Nj0`_)Q^OP3?5EVaK3$pjx6<|Ngd(p)GuGeq!}mn*b!cRiyJ<WJrxX0c-E%ao&2z
zqTsbWvIKf_GY|Y6=KYSk)l^&9FZE?ejCBL@pUclx*SWc&3aO`IrhSP9OY><0SIO6E
z%RG(vo>S|s#kM~f?Kh9tuLUDO%b6e)0oN(PI;b}Z)?P)0I(+nzK<xqTB}77;8q;_>
zna$*EdWDi03r%$aXhd8^9!soda}!bl@V4`dKdub2IGYay!NlJc;H}fxf?Cdq!=qL>
zm^&e!+mj`6k#EF^OhI~fUUIo0F#_1Pjt_A5N9&*mYUblDoF7^Tg21<*mkPh_0M5s$
zD*>2)zb$*qeT-19JRA$t+L~`jWm==SA2%~P2}Pb#^`@hXxB&iQWqBQF4mkD0(V*g?
zv{d1cGzCt)ZX{bVPP|Gyi7vvfkx>G^WE-31jsPMg$-?dwF5MBr$372t7b|><zxgCK
z)sykkSKQ0%0nl{a#C^Yf-LZs9#Zghx_0vY*+24_t@u^9+7GZ8}`SPHS!==9s{>`JV
zSCElQy-wGV$D*fvm9L-OZzxz-;aT?JLJ`is-Bacc-zu6I`ddik^YX0wvovHnzR**T
zRCGqW-A{HkCLswcd;}VGTs!3U5NiDqPwVztKpuv<@ct4W?=SsQHi5W4Xw>7J=W)QD
z<PFS#x=9$?jHwh38qf+W5;0jQXT`DxhmRkwtk{l?aF}(3kaI%yzFKAmanMDP#}&RX
zN{Do5+jXi}%&1{Sii*!;RI?o!w&Qz)a<of>7+P73EGALEvf>yCZ*WOyjQ-F-ab0Q`
zX0cu*$+UiYu{6D0p6>bCYyYDZyPhadelitbuHHOBS3u1b6OY_e%_2BeQy;I3Xb+}o
zd5MzLjezmV|Bt;t4W}~v{)b^xh7cu$45^g4%o(Bx86rxNslqm89x~G)88XwZEtzM^
zM&==vd7FudGS4#4?sbvA-{1eZ@B4Z2ym_AE_`T6V*L9uKI@h_*wLa^!M5ea2P<5|4
z101||X-LEb^9SdBvX>Vrmt-fB0>*^J?k&}rEb}_EGxrd)6&iMOtf$sYiQjHNjEmx1
z({`Hqo_>XmA}xs?_pY_o%a8BMt$X8<)Mx>##FKrW?sNxWHy)WoIIPz`Kf_ercU<U%
zj80f$U84P}J|l+m#N``Gkr}B8#7v#(nag!)c&2hI%e8-Ut;&@w$p2vckG=Qmb&xp1
z9l~~>fc6*Ts5+bHlQlc(>GI*6MStGPo`MPy$vg>;W1{hHL*M6!CrAbCsdkpiNSKLF
z6QCB4Z#g^if2OCMovq4d_uqEBbQI%4h3lIktE86EFskZ-eC6t-+mHdSst|#X!^b=I
zhSb-ONv?XZip4kenA`;3E4;&TzOP>rU%luhH=L*xS&z?29rNV4>UY3g$F~y)0Vf@v
zt2eLT<dy#Ol1rzr9idwBmg5kp@fMuwy;vZeOA%_+b^K1{nKfz}$cX#m&?Ch7I=@O`
z+}AXt))e)ea-M1GP^R}!JVq@W;`gN9&x`N_1B1Y;Cd-?s9$%7x|6-PXi``#LFr-Zn
z$d23I)N<3DB9)%UUS}fuAm^ZMRLKw@pP80F|F9#KB=%mGuTbrup&AMO+$Vyjc)W&y
z%{VN|!p0p>m9<=y`Qb=fEb$Y98>hdvG60;5CZr)zpxEMwVbIza;ir6s;0-z6A<qQh
zz7?v6gVvOEnu8RZM@-3O8FTP6y?gz>!?Ek<@MW1mR*nVlECt??jZ@2vY1j)H1uadC
zk$zLhIp0>f^GX=JfUU4?ws44R5WMj`GW@=9G?lBXbCxNwPA_?lKerBu&AO@YtM-~t
zd9+!}(J@Sn<(tazcYZIc=qu@$@bJD5UJN&$Ngpus^Ou*6V2qE^Wso_}UJ$bIQqd3f
zHm~L8(Z`A<X*<ejuKZW87%$H&qebm|Wn}}`|DYIp=L9Kn^KsNvp{F@6o0bGo4KHUz
zIe80O{MmvVRP7Judv&?X5~qn!%JjHM9}nrU%pUvki>->M%yNf|Nq6&GxWySMT5S~>
zp5K=`s}$XS^Q5%uN$!N?#;%;x#aTZeCJFh;i6;w+y}e-mI`JkaL2}v??epxvJh$!#
zZb^Fh6YD8X$eAOz<?~%TU%&lwzxwo0Wx-Dm63LE0yosU&j^;4;bYBg*Z+_t2yWjdK
z_%PAcFAV0GQ@?4QR6B<lmO9VbO?-CQPP!_$`c3plMJ8JhN?eIvWU+>9V`my*G9z!L
zmJ45rF$ot|G`QWATO6+2HRwMV{G?_cA6##6<6n`9&@!rGD98n1#xHY$U&0+%t`afH
zL~_dSLR#AtDc=c}_$+2x2#n3)p^(J$%SUV~Ukl=W#7!K*;9H_QG~_;(E#f4^m57h>
zd7n^|{PT?ECx8yh1kB>SEmCt#c<ZU=jWk$pV#$UeB{BTTnET6w@sgKawy(tOSH}$B
zP`;n{Fv^q7y2x#bS11#KzKF8%O})}CS#_8@3-8Ne6+9)pN_HAsx}y+OZ27E^*!o~*
zLqCQ6zN!+hC&`(s&!UBI)7TunHm=9u{UNQ+clBli1zYDE*OyZiAL@0UHXO#OM>jE^
zI;3tgxfsX6H2LY5tEfp3hA={?hTz6;XKpe38!x_{n5|@B<QNpU6OlvZL;8G*txit&
zYdq99lwgW*KJ7%5fyeUg!l}xl0A9%QbnLd75e4*w=l1d)sWWZ%S<Bm*%trX*c)y;m
z>p1;Xdm`xUEL(o#&@(+@!ew?p?m>z)nHMW_;lbsu>hsIBafEGuk~qYl08hc%hll<f
zgo$x0^oC*im+%a4@^|75_MtTy+0Nd+sfP0<^`%v&=lrnv>GT2_XYe@grC*+vlyt<T
znj|NUyjP0GsqRS*!V8UFOY``gw-O7}I~U(T-tyduC1cigm-X0<r=7-;ZA+g8^G^;o
z_TFy4XS3pzqQ&LP7d-ofj4;q`x9L?e-AU*HMs-KKlcD~NsuzvFDZdbvV%+7mAO?|^
z*01ey)4#4b&)@N2AwY3Q^WJ2fZcW2{fNRu*pU8=LIXs@l42P_1(#`6wyiVk}LGt+e
z_s-=6RFj@dGuy8ubqZhKu>b1DAw@oi5YWf?dDc85AaQ06rN!>5-=tkV-nVnd{|*;B
ztwp4*5ZMQdAS|WDovCl!ir9DJJDa3)LBZYG8SUfuWpDMGn40<#V$iS3QxCno<>HM!
zyEs}{O3$pmYU;c?SSu3x@@1LeLvVvp;oNTy@x=Gu!y{B|C7`gUpBZxJWm7toW~Hbm
z=<9VaYcwr`gN@DnYFx=V3NwQfG&gVi;jYXGfz)+~5=!Yh_U6ggvo%o*mp+KUgeP+Z
zp~xgW-0G}2A)6QGA|=p!in;AD6Zr3Gj!fUJLJJmtBseWc#JC-z5kPwGdq<k4FceI*
z@wE`R!#7v#&nn@hqVV-v!6onIUi`a6OF|}DRVaPzB;I%8_oj`96Y&hWZ>J|N&l~w|
zRqJF8Hg-CW9>Lsq*KMEgj>MOLIM+Ij=S!+e^U}n;Si4zvGKrS!4F_#kgcge0SDuO^
zz6pRDIcS%V_@Fno-!7m4T_D-To2QT3Q>%dJd$;4i=BcaYeH4H=o3@8A_(o;zMM3Tt
zA3u9Vw|V~y^`Z15b*KS~aO+L4>|z7Hu#1{w<JN^6o-MKpM$22n4K{-m_uMx+$jtil
ze0)Vx2SzOM<obIHI=*h;|1>;*tAG5Z(#F_Cz@6*84ik>s7w|HCPM=e}LK`PyU;h{_
zZSjV$V?cHt666Vbvtqq;=(ls?{HLnN4yBt`q}jqiPkp9LODU^Z<G{E*p6TRJd;U~i
z?7dpEr~{X2im02pjjv*e7PEdgXT~_i#&$<6q=u$*w5cmcapok9Yw_-0+YE3NQ^54A
zXAR=xRh$&iNKuU+IzRbMuY@suDg^)8uQ94(=e9$yjux3L|41~yD!UeactSh<CVqTJ
zNcl#PQbJSYIaY{7{B#~Kus(i4PdLU0jc&>wq*xu}%X%|atI;8D?`(|`9a7NF7N_+}
zoqe==tDbiD;#Z}}(BKNj%u`+sT{FvNE?)+7mOY3Er^EawZ5;WPa*F9n-jcMIe>4U!
zKM4*`Sw)9#rTuVbrYc@$euT|Q4H0||>im)PR7vq`1p4R~o^o|;3tu8nHIb)Tr*ay5
z3>L`0*Vf$ZgOqyzBVQYi>KXACzdEks{%4c|mnlPNe_xg37`f-&(?KmGNA=_$5@0Sl
zcRD1!q<#Ik?Z{N_XX4|FpG27a_{3zz`NWNM0P$wP9@BAxeftCdg>d4)SiK~OEV#7a
z6k@3v6t_qD$rGvsa)}YvRyi62NK{hpO)4!rSn6#KS!u761-aej4qwE};Y_A_Z2a7=
z%Mq_Uj^p*RxV=@n_Yl}ik6F9PSMVl2a}cyOKF!CwarX9z;vka@9R(Yma95`9)Mv&S
z<1Pkf8{&D(e(5nkm973?bG02pMCzZC+jO>N6B&1Kpfg26BTf2=P*%H3>%_E%U$o!r
z6N0e*A{6wmCnP>4d=YpB7S=20Zs{14QR39?NrW2>&wmSEZHZ}Ov^7~d;XPGief*ZD
zV=i83rb!zoez^(}(@%v&dX2F1SdnhqwedJY2T{cnGEC-M?-ixr5O!F1+;SYYOx(Wa
zKm6<IrsI8&<CU=&+&&UNoUKH^{At>eX<T^gy2G|^vEi$&3qw~zH6D)F_w2S)8BMD#
zCdNF`xGT+K6xXhtE{L-4M7JLw@wFN+e|fy&9jjN|_O*}Jav!APc4Uc8#9z)eX^2ah
zR%bn;RivZ3KHp$1#3QRTLA&fj)iu6Oe=DfSmhnjP9p(jUM}mqBN&SY_lZKztBUi4h
z;uzmlz2|*Z^Ay|QC#dZ3rldkS*_?QiIlxmPvGUC|rbLzHW3NaB#~#iQ;_B0~-^4J_
zGNcACS-4*xoc-ySde33@8kYWqI90+?QudcAflOx=hkWCz`DcNBV4}^~#)bbgz&>9#
zWdLE}Xl2jW0Q$3l*nTx5XO6jkE|(4`I!Hl%_^9HA<3AP5Ib=vR@~D@M@eU1=u^hv{
zY=|4AG;Hcv<$@r-uk+fc_SZfS*ZLV|uK~P}>{<MhR!Mew;#GVnP^cT)uYbIzHI3s=
z5N4(hA^lKmFxYrFP10<P|GW1s@kE=?D%%&g0*jse-e70TGH$7Ao;xyj_e$h#=R|Y9
zdj%@Z_;-Ct&yO*py!B_rox}&CGlgq1=PoH|X>Wf_E*6))+#x2r6h9$HKIlJ1GkDl{
z!|>iGt4?Xc##2dx=>U>@yg1tA%7Z7WThVf&`vIhtuohd{_TM}Gt0e;e+FH=^c&ltT
zo{+B{Pij6rh$u6#AWWFb5L8-<PrhM9DKd!&4`r&;dg7HPFc4F47AnRrlZXXmS@f)L
zoDs9vdRC$7vwIw*)mxvkiUOzmh|Irlpx9P3ceFOm!mkPsr;(_bc6iM>O8wAt0pq&K
zPyKSd@2GE2oqBt?+j*AAWxMxwPrA2$Cu9ByYox|YEBo`c6-F~6FXm=XUX#jyHYrPZ
zc-_q+Ib*cBp}Q_<s{2g-6PMBI%asgltnAge^jn<(xo$W;`84)iN%~|mETKV)D`Pdu
zS3xd2ek>!QoA<boT<Lv2XU~B$Hw7F<@|0OS%BI6$QOH?tjs|m0LFye->f!JB7?+Hg
z61UUkn94G?G6Ht*O|9xOqNuIiv<F?Qy8Qw3o^q>()*eB1Ll6@_g1OZF_G*kkWi})t
zNgQ^4I7d-><(F$XBk=`ahs$z<Y&zu2pH;57IM9EYO6@kzyu;s_DIY}T_q>+oVRCYW
zuoSfnff>qF+&fj+B2g8t``2niADA`D{bb|9F<^cTXm?}~P&~sYj`O8al3{bLaUgxh
zpL&SI0CU<rb0P`XEoMmZ{<@r=aR0WROiiun7}fWAKu1m}a!h%~(|!N)lExT(4X?WU
zL31`8Wg~aYqfRp_mScCM&94qA@Xu4yq-v1M2P4!XJ%f82Z=Bj?9HU`5lj*;xa)#N{
zX*T`k)it~@#pikQw+MI$X9smIHc{HBR{Z(KRLsKkK6IU_GM}`P77))BoO0$5+%eY0
zkA92Gw&N#y5j1K~`JrnsjtN~4>@4?i>pwwo)S&BCtY7WcpBj>&%6LMYjVBZ-bd)8@
z91vQnoN6KPDlujE@w@iWw99*nHs@_Jepkgu#BH8)2UwT4j4D4qQQZqUwH6YCY=z^a
z9+gOrx0ujZ6F%6C3Uaum`!USeXySA1S+yO7k(i&c>tC)B;zoB0ZiJonQcw8u8Xr~d
z`M5C7M-B7m&wC=&WxChZKQw$~2v7ybTc~aTs^p|twUdjcS_AOmym<dD+`j&`<Bvbo
zfxmYYZ!?KIq(ayX7gBHHUzI?UEUeewe+gnwa=Ev&b$e{NCN)phf1?$vzT*9O{1z~8
zgpss5U$8DQ|9w|;h>L5GVoX6^!r(J8*3<oGF2QN%H*lPIpD51Kp*(6{_zEvMlQRN0
zgjUR}b?E$ZA~k@Rl*FKEEm^)lBd2__@F!lBO;cQWf}ogvzDUQWfW;q&e#5yxqIjfJ
zWSC0|`S-27B@&BV{WI;ejYm&9j+!&$1>+4STUa_&p))SM=Uu51GW;O&*2b~aOzX+&
z6EMj2H+OqWXdcJZ=LZi(H`<S0JQB&-_A61y@3(sBv9mEsXDAg<G1tE)wr8ZE<l;h$
zr=6*}sZYu7=wKs|*my<|J>#rr5sPxObhF-MSi#P^WeqG$Xif|3uCD6DH2>Hwn{zvg
zF<z4{+qy6aQSYd7JR%fs*LJ-rdNyLV*HLgs`a+rMpOvh8!*BU3LU@@(3zg63SafY=
zVw>W#G^4NL-ux-=>s{e<Cce8Cr<~xhE24LK^2O7^Mk+lcickck!4<W(NQP^p!fAND
zREw6K3=9uEqL-WG^Mp^9oVoamis(!?`i$$dxu(S3E1Akd)hLGN+3jVq^1&=KzI3kR
z^d}w7Qf3Jw#m}_b$gEPfOG(PUz?ruhJH#1dHpU?I>by(n-jRB9)=a!XbyrpwMKRIH
zAmv~Z<oQ*D5Z#h{Y1=-=IAPOzf<in-FIfTTx{Ps9thl+OELP}5dybMXD<j@yCZ>f#
zVjGR?Yi+^_+CPg*PkPrG?MjuqG)wzZ@9}r)9Y>Y2Rfs)lu1wdD58?H8w~UVb!tQ>4
zD%(G;kD~J>Nhy}QQ+<vyYN>_c+J@(N+-e8j`K6@m+i*P>Qt$n6|0#swrt(&AcxBJM
zTG2u!PaHL~NnY)Ao&qqGO-}d4DV_^@PcpICt3IJSDyXXy*w0S|V-6)52vd5I2)lP`
zhy8LJH1kWxGC5v1T8o>@DG!J{+x)A6pc>`++<>NB8og5l8RvN&t6BpyEz<?_7E1$a
zWqeDtKU$hEZ!c+zX5N@$jA2d@?EWk|PZfLZjjOiJ<N37R>`9HrdE+k|bIG16r<P8U
zWnDqlOjJ0>jbv`15ZXZqf!4pjlCmqEpD)^*($a0StSep7kWzLYXHAOJ2$**xx<N*b
z(cTb&AH8Drudag!gyS+_0m3@J1-P9gR?OEnDax27>4IhHHDOXRDzdiX9AbfDwb!`U
zt-I9S^QV?>f9n{V%H)pmNl<2yWhEe`D*OzDxT@kbiqW1ECZoT0Ls=Wva86lP@w)@N
zJKgKQXKy->zI}DpU2_Ag7kgVe_zPc#xuS=G9$hMaB=Ms-?Q0BD_hdCG)f`G*cFaZX
zsBC88XM3LiZF$97EAXPE82QX%%G{G3iMT8JYddbgn!9Z@nU@Bbw*7p%Vi|(-zP-oK
z;B)D6o0MSP_?*6@|5RGIO?LH6hdS5s#-y>h%_XzSwEM;7&BaT1q-SZTj<R6PUZ*x#
zI9iMMPpPaHlka}GI*FR_d@6o~IR)q5UPd`|Q<LZ6!h#-$SU{4!?vyj6c`8e&*qE3N
zotNMMCHZ=-aE}mkkx9WOO1KKW9+PsFC|8~>Ymmp{ozboyxE-fiBSlXG+WlKhO7AHd
z8xw_CTx!^0W81fzPAUi(=a2121hO(J3INbcV(n;cjz&L$-JrOuL;`edVhvkNS8bwP
zG6HcO=w};$a<ZF$(#JB%+t6?8Ox<a}VesH&)d<F4<|+{>?%3y(yDuQCc_2dK4L>yG
z8Jv1y2fqAAG-#EX3@67y70b7)UC<Y9{E?F{<p2xt=+ErARoO}l7)+xxNWh{NLrmJ{
zOlP{D=1v#Z(cS7ql^*0}SN^`$3~fjz1kY3fwrJfpKL$SSv=Ws5kOmN0P5ejiKtj6v
z);DS6>42gBpxH~*GHfC~P*bJYEpsAnGvZ>npoiM_37PR~vf^f)G>xPO)l}o18@7?z
z!IycOX00M@pa;*w7c0+0Gx0KlzkuVNrdWpZB}{^Ho>;Ts6%fm3d`7y2Rp{D<9IvqY
zr1V;goN_DA>QricB5IjIA)ifg$B5WZIZ--yZYdX1RB3;%b1^&)-Msl8Z9`UtCewTG
zD@RU^Ije&iL$kpQU8N_*;kurCl&m&%&`U?GFG@#4z<kRxWzil!h=(srM8~Uhrd$7q
zqSaKd#$j5{*YoKx&o^*{3DhuXf~IH6q&Ssv0^u_{FmtWHC7X#rK%mz)QE0wQy#jra
z-8|K(?fHi$H^xrxeUXsJe0Ti;r$JU8#-JqrTqk%9KI3FhBT3uTjS=l+IF$(^$1_MN
zywG0BhU4%xP4Q4qW}Z$-<HfImIY~C};&UOO4&wHll+9bF@(`ntDI3YFneYr9{Yf-Y
z6&kf9DMkP@N%}!WJaVux(Mnx89+8c+P4+6s*u(_Wn9RDb+BnsZv{Y3uR99%#ew3{-
z-uh0)^FwHNIq519HR*eAzUJwz!R;_X>m%!nj`Y`mZn;Qmf&d4;BT;}^qr;E7o^lL>
z?w~Zk9s!WL_(Ydi*I|-59G?#IVEgZ`_nWV~t~@aFnk#EtNRC2CH32|IVL;Nz!!~xx
z8;`*DxR}dABJk(jj^a1qL*kb&t#3qRyfxsvz3c9IGWJ-17D#~oQ2G}*fzEbGSRus7
zOlL)N6+r^xO>Ua^!GM0tqm4x$eADjD+o<w(Z0K(2a;&J!W-c-A#`SA=5n2V}5s<Ql
ze3WT@a4qp}UA#uJ-Ssqwn}^gc6NP9!hor9}5mI0bYDp3@KO=_CvyEU5N6@|Qx|T?R
zll7p}VuBS4xBQTg7$(DC&>nH=?SM_>@i+}X9-TC<IRi66!%BY-P_HJQV4*h79i?T5
z(UHCeK&*+0L8?EbEYHEK{Jpho?0X9w*zSQS08b{e|9$#@=j8tjIq`l})w9C<!u=PH
zt(3m7^u`TNo#B&n<Usj<48$^uZTtj_tPLF@OZ1*V5+TZ|W#*H~d`}pr`i(iK(6!3s
z!N3A!FOtf^NkLbp7oUAbOgTcR`3fGW()qU_wgcjm&3_Qf;0_#lObR`Sx<SCBN1wRZ
zh*zOW#wCv-TDOuO{#I_9Fh_C)PQ?JSz0GkNzI+rhA4x~6CWufh?-v#Kw4VX&x%K#<
za#m1ONxU3;8!rgy1Oj_L^_nDw2IJz5JAD*XFOfTJmC*o^4Nqz?YztoR9hf4p9jOr*
z?=wRd3~)2!iwDc?b@=(-J{#Y>f^_)z)ZC-<Ug=xskQ~ya)9-;0Uk1;ok<`@Y+x_xN
z%I*=73PrBH_hc&Bd>}Sjmow!VsdmouzWVKmCzWUzP*gdsvxD{7^~0l~TkDJW#0l!<
zk#<PqD}Q@=@NY=XH86IQ{hntKgbpFIxTQ7-IaCWZJJkz6$I-9Z4D+Bvtuxmv3Tu2F
zLYX%Uu0tjP4>P_oGBcz^2#bNA9;$KwWen#Bw^gf4q6RlzKJF4droI5{)i}PAXwbK$
zmQov7G^Y$yF1-KGFF)Lzvzs!oj_&II^qd4g0kl-0mD<MFN4p+x^QJsAbZ=pO|IGp$
ztjeK}Q-KV~+Tr;K9@645;GBE)?o^V0wC}%{zkPL_wEwhaJM9h3AdSCos*5AzlnDX7
z`=sO1H-8ud{Mt(ZDH0L7J$4OW;^wYA^Zo-9_wCd9Z324!O?l$7wZvQH?{mK<ym#1Z
ziVbYdLuo$(Y4=;sJJgk>cTUaGLx(RzkCmM$kRA@F@@owA-xqtbbIocXqdqrr<aC_-
z{>vaZhvFInkM6&`bg003fG;T^Dw6&kBMA+G7W}t%Yw}WnZpAwN#M+mEMBQ~ejY6CI
z4GOaBdz0)5pI>x)!2l*IvS1VB)BPoPM67#xa->DIxuP<NXb(OT7_ham$v=`78mzp&
zSWokA{e3J2J~TUzg(ZzGY50y921L}gVnRO(ZNn)hcQCrk(ZuSf41?hRg|q4lTg<Qn
z;UDJ$Ll?O)3Xey_-w9B_7%CA)IL~RWhgXreW!N6#$Cwq|-zJ{u%MQqUv?E$b{Xq$q
zJ>AJzHxnm0C7S*zf_sYR2hZnRRo48dIHc3%M_~-A+zh1qV?ZDKUh+fxU6C=C;_<My
z$s1#(z;G3?#RRiXU+hY7dl&S0`;zvj`&q|%^;K|E(RjS_8hi>!<9>jM*FJf%h<5)w
z#N?1=eaC6LT$Afgp^U48`xB`qidotuV;<Om?V`g!j0b9;t?EY^h-TSGTaEIE7yxX0
zWG7M(btCrGoBMlO<TW3hYpyR+sgSYZ`P3c)5Fm#_FpJ}EZ-rOI^p8&lTv1u)seVH3
zr_OBFb;=?F{HGvH6z1|;5xLu|j}|)Ho-cg;v^e}J=7(`}xm50MWqIb~<E#6-bTE^E
z>5i2>?t0jwdBmluA`G&LkQs3zJ<g-9j^n*6r*xc~lak+Jny-%Mc4;2-SIx5oD44Dx
z+C7B^U3x}FaZ*e@EG*2Z(i@-5je}}`9rhOkYAs<+M+%SZ^&X#y9Zb%E*Ma2wD~&9-
zB%I%i)UgVC+n0@62%dlMi21XpUiUfzR%K7Wbo$=>%2aZJ-ImdN?c-i2;fKs1Y)XRN
z!+850m-&rYWT>X_|1|Ib<TIc@L<0F3@&X6EAVBuj?BEM{1o9x`46Vfed(tPvb^^qy
zYFsd3e}<roTkNo0(KiH+AS(bh0uTc=`#R8N(`;t|thKeD?Z}-OfTRLep94I0!PWhR
zhxTU?Oq>@WyUZT(O8f7+p9+br(uQ17W5`#V5Z~S$*o^`_tg_A+Eze!`?BYo-2Jq5!
z04V={mvR&j!nSc8h+YKUM*%m_2Ml(hkSEZfD%yVrft3M6m^?6_yMA*7&YcL+pO0nV
zh&<x5A8oAbxSQwu!>UA8-)$y8ViCY-T!7GaitQ|Mwk}#~Q%(YqX|83DKpmt{CeJop
zKbQ;e85nzkbI8WtIYIqFk&XK)7E%oA06;o|lbBP{Oa#$GIGxC1x`Cv7wG=3RO76Fi
zu(}W8#&ybE^MP(v^i(?IYcs>ujj5cJ%~D8n)yXbhQ0ggl!A3xi#T!v;zF7B#mu}^_
ztL-iP4hY{o!tS}bIv35M`lc4B2n2<XfP27?yfcmi$rS-g$m>9vWYqZjq8(t*MF*#V
zqeybTk`P_Ye{OG&AWK_-$jQ$ulYixe-6!)14tCR71J8A1Uv@@}8EGCc<+S6b1}Z8j
z7=;w+c<!=M!oPmzCQk$@>xE9b&6Tb8y+Qm7w#hzs#@7G>JL1b^W#h&4$R=4t++rr{
z)6Xy<&o%f4TsvbJ`&VwwM^DQaVi!an^`5x|G)aC(ss3&We4<iBsx@L!f2H~Ye)v-?
z0I5q(MeJ;M-LsamKsji2eYjse+$1{SfJ*C3(^S}4nh3X>S!|Z(N1(pI`k&M{1=SA1
zk$avpJ20(`;OyGf5%L-{z7m9y>sG3Il^$Q3kM?;aTRG#Tn2iiV@!Z8EE;Qe!Z2~xs
z2$0Tel0zy}uH$hpjNqL%@M3<#A=D3jUj4<6c7Snic){v9&c`saFZ_tKgTfhIJtBB{
zUt%ea)lLqopp?sYcQ#RhcV*A$jixH-2;u<cIP7auOk@Whc<_mbV$-EI#uEfwmrNM|
zIs%TTYRV4|#CJ{UUi;ofXiyPedd}5ipdvsRh@GQ-fnlnLG7YG7Ib3rjW($V`QRY&J
z+}wr>Y{Z}np>Wn}TFSdH$Qmu)One??b{Wb>2w4A=6b%V5N=x>A4#3Gs5a0UzZhh`p
zkCh$<H*}1oOB-byuv49Z3?i|Nqv!ta`Tu+AaKLWoy@Fe4EzGIL6G5rD$&4H)F94$6
zC(}!DbDn7Xf@Llqcs=_fc{8zW{xkXP1S8i3@rF2!CG5yqyPCi!f@PzY%EHjNwlm$2
zj}L1!uJTZ}35m6Pe}FwRdkhDOO-iQdWKW_iHHcqpGyeIJGOamHs68QgezGfz8!)Sv
zc&R1d(o644EzG_plG>GT-+|a}Y~J3_3AC9p6PX#1kF-g_K>6hAsSuplCS*JfSCO!)
zT;d19fQ2#X-JY0!$X_bJ)=#SNZJQ_g+A`KZKeCPIgZthlzO?g^7_}j+O8Eh{6muM`
zW!A)<tBw=HkdV{={>G|vyZv%QTq_X@0eMJd1Khpn)cc=mt3l+_4qX^NHB}`g@eG>(
z_o;laE+%s(Q2d87%7s_I^gHG2s|p|%BT3QvVSwH*#+GZ4po(oPa9Zsx%E8zsvG(BO
z(|tvnTA6o4O>9Km*KAg~qY-w0a{f|v*Oh5bP>=y!V(la7-L)F&zOiwjzhvP-Qn|+U
zAN_hk{A8|l@zv%vwZ94(On^DW1em)pBnwjrzp~+15TT0mG*Q_THTXiAhnd9*RiZs7
zGjlrd!#!2r11LrVD4bMCP|iPHQc5>xOU(E>JLYE~OTZS@HnC%b(3ug6^jq`3VL%n5
zdJ)|6k!#Vu`Yt1pv&XssE!F@inplBvQ`sfB?f6kZ|5CVO&ma1G$Whla!YP(%JCglZ
zuZH-_Xpx{c@~_A6Ms~$1Q!kT8nt7aLYcnw;O?LB`ctNz3fXApm!6O8!#~=}A23vCH
zjn&QoXlv2$ZTz<;eK+V0+-;R?lJm#o41RX>pwg(yqKR!b&fd@qOH)|WP~9%`H3DvQ
zJ3?$rW8OThu`j5N8^G+wgYfg&r*7E#z+ctcP?J0&dJF4_!;o@&>HO{V2uP&6;;<|H
z|2&mGnH0FI_!_R&lueknMF-9ipptwT)~aosrIQ~P$$clePeC>p;e+X~^@0Spcx6H1
zj<y8QU$fo(-I?0q$1r7E3k~oRJz{rS4wP>Px(a=0li_ji>P8MSU?`CuVriCI#W1x^
z#69HEE&L$CtrG1@$FJYlPpDd5l6NF)s^Eu}#L(ErNOQ?-u`-mJI1gN=MG_p_3nt@F
zi7w*XUHo_6`oMX6NHtz=Uwdt_Bj>GT5NNB2Uo#H;`Jnq6fI_7Yt-TRh2kNru{_V+!
zP@rVC?c1@lN{KGbPf4i!afZrMI%P7em)d(wfSWqd)vp*`G5a+6?QV@-AglDxeofU?
zvv#!U$$#hO02mcpKkt8gvA=C=2pCVohV05>ceJ6~5e#lnaVhmRET#}lkVS!F`6hQ#
z{Glo;<WXPYO-NE?{d3weE!neDrULIxmt9NaPxWf4GW}?`$*^1hmW7QJB>c*_z{9_{
ztJC-~=*0WMd^8;`D$$2I&Yryv+#rcwyF0TcOORQj)Fa)HAQ0p=4S8EU;th{LA0O>b
z$NqL8*7G4a#9l74MbKaU2ohapkKNSsulM<D^^p>RE0Bj&&m?evWRK6=z^MoX>6(2x
zs`^en##U!R5oIwEO_b{tU%agFjR5xisxcf`v8Y=c?vzrH&Nrh?DS6^xCn&;gO*{Up
z3(Nlc;M0&ciMQV$YkJ$w>|siE(d6x+X=B5}f756TgNzamwEJs*+<XsJR2Kf!u$CA+
z?nnTl^|n$M`&QK{aDeXZbL7Vbgb1|*r%u1$!sTP6v5s_%bl4{M4nyB32jf~pJntz%
z#&`Fddd<-S^0+#LSFBy;$zup7V{)XT)Im4kB@Bm*%{TIcX(2lbAN^*DeHOAvy)ReL
z0iP4F<`0O5?f(9yw$vLZ-p(&4@PlX31oQHsWwyZ>kVxkh$-R3aTO~nHuL=45I8>oc
z<h)+REP48$>gNHQ0sFx$V$Z*3SmDit>CE>!8OTQ9lf(|rfycI7|AgEdK8dgTo(O5$
zXArsm<%|IpaAmTf(cj*D(1QW59MO<Q-tk5#e6$L93*yOe4;F{a55`bQ2-Y-Ji_4k)
z4+DJNodm4u?+(6uefU4Est@2iZLBZ&swGsVZRx4Ie?-AJAA{@ipTj@~1jrklbJ}=s
zaUuKbC{yVK?fj5X+}@jEEx0Ia_Y{2h2hV>~aK_(f@bIa?beEJF3*BD^7}Gp6utpiS
zGYI@0_jlwf<VKRlx~Joz{-7ZD54wABa)HlW_+V}Z5e0DILcs$)j8{$w1G5yD-Sabm
z;QWAfvz%jF-m}f_A}&Sa9}l>pWS@w1_Q5kml3?U7;>9TJ-$THMQi*u*y;V`Qf2G8*
z1lpNlPW0THOm6sQrsX}$YfvL~w5Id#Gv3}LF!BF<G%!8_;Ks)s`Y=JU?<Iiq9&RVt
zvZo!vh`bmg^}{0{1!S2*g}$vq#)Dln50lzR=)m$<s9<gfkb#NBoZ7c9z5l1(o{0!j
z?xT;isy1|QS?~OP29JOR?m7RQKrlY?K?!7F|0^_X|Gz9WPflR;bbmj%<hf9o*;gd+
znjiMog-u-Q!QL`QZp1$n{_M$I!C@m~8z<u;?vVy3gL-~9uh4oLt`zFB?%z^r8j1dS
z=uUah*(Ge>R#nr&uU@L8kwd&mLtmMbbbagu-1;pUDlAJadn-6ciWTRgi_cjP%opTk
zwBE=?HO%gqcK_0A^YC;==RMHS?fo?W>Ur*V@J!oAbyrG%j{HZ)-1zFlW|tF++;eBc
z3njUe-$K0XOU>7RTLJNA>ZdNM?CGpZa_FbXpN*Go<tA?3g;j2pM!%Au&q}M6n7z5h
zIZ1?KEeO56niclLq@ROn)%rN)yCba9Jd2`@(HRW}yfrsi!ZrA#l_qYUS$$hz-fLDC
z^V6<4FTG8P&Qg~x{EkXg-zB?-1V`3LY1*A|U6FNxb<fS!ixuPs`I|~kCwqSp`}|U<
z(mj;4OS?T1=~cl#wUQaIe(B->Y;lvTR{M?u*pLUYr+wcGz5;R^mg--rT^NXNpdHB$
zdAW-|&phU!A}W?~J3eZus5CgjBPnxhrN(+;+<+xAG)TTUPUk#Pz_919Ds9tm-Hn}s
zBVAP1suLwRL$|V?DS?HOP=WhBL@v2I;dTRGSFgwEJ~x_qm)|yH&~MZlbxmNh_ohaU
zgOq4(hIVPPI~1(Y&bNrn%j)VX>sFR7J=5&9S!!zIbU&2bc<fiqY-W*h1+RV0R?Q!2
zBdrYWHb?zCX?IiOC2+f2JFG<opYwC)YA-Y7n);O%ZK<y6%jPg}Wn~mZS>kf8-J`GD
zj_v2)44w_1D^aqcGf<oLJvhP6ASxZdR+3;(9r<(+Xwzc)H5XZ27B@uuM_#Jw{fQ_M
z!j1=AYp2Amoa`C$)kr938hN)Uj};yMsk2&t&02SO_8Q&9_UETqr_LXB5={e_c%LcH
z*mPfWe#tkYTjG|*)M4fwHM~_k*h$`JaSt<a)pKozq{rj&C#J^}abpvnW@E8#<-;dK
zp#`BcTUI}ub4M!a%sf5q(Z%NJk|pEX+FhRLybT5JG<PdwhMzWtmePfKCYkR}ZS<W!
z_e$|)v{G#)H;GaO_eR|q)8%N(p4{uzi;Ts}5}g;aNvgDUFBLHyoF5^ipjze5FCv~r
zurD;ojBeYA?5?b9l-+gYTvTG1ur;F+<vHBMqHK|7a(<i(os(GFHcVTTU!Nsly)9`b
zkpJ?YZQg3TMM>8L*TZb4(@n{u;mw}wJWbAnSdrM5I2rB>#on9tdJoc-Wrg!5-LIoW
ziMY4z%$!AZHF6RodRYpG@fm&?W#010N_Q(KHiP=r<3ArR1cfJQCz;sIY<9mpiqTu1
zy}i&mn$!1Qg%Ep{|9IHQDql3F$6l%I`f}gYIiVOfRW1=**`h)gB9!gOu*tzDlwn8Q
zV{wgw`v#Qa*$~<)?e!-kBi*=);<xitnY?;VsWl4HUS~v`N;#XPaB!LRXmYsz&<AtK
zXu6`NSo!R--->5k+|Rq~^6P6|!6WEv&58rhJ=ADgY%Fm6_KDWp<+)QWd_@cW-2Jfz
z`YU$pqd4onq%dAS(^K>qH=mBj3BH@yA*tY!)yY+3X|g7hcC>Adh{x?C%+hCqQbe4Z
zr*7v&mn<!OV-T%B_sP?)HxO%C9j1D<>h0`E!AKl0Co3N*zlLmnAz$|yu^}&#{gJ>@
zO~KGIEf@~%9d~P2*tnlyTf$^1qa3jAQsNzxgSc;PXJ3K|zFdRv*}s_~kzu)AzHGNT
zU843QQ~J&3bN<eb!AI4EOv5i`q!s%=O(o}LJ<NKwpo8u1lTnU-uCK9hEbT(}lg-7J
zW(SU*dHL!jhn+S~!73p!riCWziQU_~f={remll;`K1~*`wlF*t&pbA*aDWA{&u)#7
zM>UEmbBo>c7%+F<+Gr(`a_C9nvz@_yP;)mBeu7<dwG%zC%-HZjTaw9)9^GHLM7mo4
zLCYo&i@qA&wch8bXSeirJ8x%mdNlz7%`8(gHpiEjWz_S&po9B|QNs1DmFCms29r7F
zzU8&+pyo&VBj=Qo?76R3Y-}6-sCs0#lQC9O_0IGB;0UWeos>d!)T9Cdmul)T?n#5C
zP4fM2?;PUVy2S8>?@7URr3PblgVw|-E-|+$-%p7*pUcmxi1aHz8oW(tJQrG~vAsO`
z$HSHk_j{_Y|6nnEh``i1i-%U&cff>nR%wruN2Ntyqieja?m4$m(l~IzU`ZBFJUqQA
zQ)IzDC3a+#%F1c=N_*0qv|yIl)sUk>@|cf}3zn<x^W0l4i;j!CZ~03vjRY?@ymt1m
zi{|MmkXssCw0e7Px&F)?f3h;4BYJc=GTd0cfx*g#PUoA;+Q;gJB~i5!ul%%L`Q)EM
zo&7`8ohCU-PZubM*XoAChA<~*N${**+de&c+4HWJ!`)xA(a8q=9SoM2e8+5J69j)O
z6@TVG9(8P8sTR{^-)FznqW#!wbJ%)T-_%wLqU7~;P5*`~hYa^B8ufR$N^IOF7<6DJ
zKAMlA^K9tb6>IT?Ta-(O$M~jw&01MD0+pU~*9A*>22RrH7|c!=Jxo7UO#<l6PI|t7
zzVrX#K_t>*Gp*7#WMaN-gV6jONJ*tQoc&We@>P3M;)$57vh=4<5+YGN%&P<a{TSkq
zqaIYLdsiklZxAf7hX2<-M-=}5+UL~&d#U^XmrI?^Q^8l2WM@>bAMSh2aN)pSF+_r=
zP5lW@`$~a_=7gBd`+K&)1jU<y`RhnQ44Eeg5a3I;pvnGol7LuRBz;%&FV03PcNQL?
zWupHZQIQb>MbVmSOL@?cxAz^yg0EZD-Twjs^1=VQJjzr)0BscTR$*fQ%=!*46q9>3
zsq_c8SkeNxxqR4c9^b#YC?XY5t`@nR_}73;AT)51!0yuDzZmI*;kl^w-@cSG80@Ol
zdxF1RI1OERttF_w|0_6t_aPIfpCTiMUyneKBsSzC_j`nB=Q?Pjl<e>CAs4{bdv<e?
z?cjYpA2HD6U-3il{f!*RjDl4mF!n|JKVL9`C(x}V>HAHvQNss)Oa-6*ix#>=r$*mu
z3H<dGH6qdX1HjYX&B6wv(Se_kz&G-VV=zro7tC3n9jI$IY7*#`7@HmW-(J;XV4_Ab
zV&n%CDN{)S;}YDEq22El(N&ly>PIFa2c7h`hF-h3D-rMezr5F>kE5%8hyS@+_TZ_1
zL=Fb|8L<lGo1Pw=XZS?F5i}Z!oYjE`3=Prm1`G}AU$~9*HG;R2XHETkE06&UNK{0`
z8A4(T(l3u0{(6+4L*D(+xYKIz<bLBHlnlY^h{W;8nk2F5kNvOhtrYTu6dC8B4$gc3
z$@v?G^Mh4()88+);sp`@8#^980v1!5qh8+c%iw7ra6aomePedU_qKmWgv_O3G04gU
zbNt^pmiw6tMzz$J9L7UTy}um{3+dkk^e@@jZ)1P(^P!c3&61}9<a?a5MeIKlLVW>o
zs4X42`4{GJ`PgHmPz`Leczi~Ze<F-WkPLB1^v_e1e<KkJ1c+xFfkX(&4DE}|zGynY
z61GdodF-#*nxf%&OB}GF-Ee`JoQDwCe_WSrgLmN33a2RB{Q(=kuY|y%5pXa5e`>k<
z1ywWv!2^i@<EVy-`JaFS{Dwoo%y$lQ&7Bl3KKMe~1Cc~i@8lY&!<+<FgrkB(7XBkO
zTY$(L?Zyf4?Mo&FwV3Nt>TpA}8Wb0q^}KEmxe&Qfh9?c!ptiQ>yrP$<x&z)-|Mz6S
zgvsV`dj95Mvb{a8*n>QTL228%Sm_F;h!`>3t5BqqU)Gl*RKLu1-87HlUsy$^QVm}B
zoaA`8A7`zU@qzHOGDOn*-}7vp*GTl6?#*wg^yeh|kF@w0!Q}nibCkj#YwdhIw>Rnv
zZHM~!%_pHiJsGeG0*}UUxb}`E%fEX+pak-urPg{|<^xrvGYE`=vzb^V9NB&02T!{L
zW)H%+_s$>yr_e9fB=;S5K5S$ZOd^c{dycl69O(JMqu7ti{I@j^u)Tk`gT{OHxT{wE
z04BiZdiwu!KT?xMA<A;2$@K4LxMldo?`V932>Q{e8Pj92e**Vka0Y4$AB4*vD!I3v
zw2TpVL?}9{ybUAXk@M_~U7lF!7Jk=#1^*{mAlfai!y-&lHuyigs*(w&$-2)4T+Ao9
z)S<N2Iuc_n-<*|5_zbWHB_PnD%);buaa>5=q+8!`41GXSzwyX4aEyllnDquEDt6{<
znCl`XnDh?@3hqYm^SbyoCmfZjl*FgRL0RysMu9~gnnM5C>>{Nwt%%U8IR00LWYggu
zVKebr3o8rhRg7W@{}ciRxG@Dka7KV;{7GiSA=chw3LN!egd|65VF8VxB)3MnyGtuS
zF%+MFn+pY6T8(3XQz!JEby+8KW9rQWqn=#jwh#AZ8KgZ+K{AwqgOLTPFS!IMV&&T_
z_tI`A_y)2_>6mwC&oJYuiCi5V4?SZRGUk>xv{gU4^_w)R5=U@v*#8xKzr5g&3wL8s
zd4ES|7!M2nQZJ4Jz)LDJPZ6o8KNst6Cr=5<x?vDwDyiy%aOL<;DHP2@3i-M~arpig
z3joMi;fX*+9<;b)`ewB6Lm=h<P`XghjZ|fEfik_uRZk8re5E%iTs7^<O&b4cep=yG
zgnhHr9iT26=~;96z(7!3(GyZ|FKLU<<RrQ^^o!QVv~jKpi6n~6w`ZebQvNAuGnoB9
z4-kELpMVj=tDSpBH5@P?N+1-PPq47xiv^pYZszn(8Kl=C1ZRcalXnP#fl<R*>kaXJ
zh!d}>W$5ImkC!<#3sLq!sP>n>plJ)+1mGv82PzB>A3rZ+K)_SEV^a0#^5)w^pH>4;
z{@r$s$iiXyZ#@SYATzZoK(yLej598Ekl$-P0IEw>Q{uR^GS34(*{&Ha;hYUMuR69p
zAbzQl-G279cmP5mwJ@F#T%UM79AOupZ4L%jX~0-SAiKBHhp=DzEfj8cnC<JK>=b%G
zM4FAa*Mn(my1t)+2LYK2z|7g$SgsCWtjA|U(0k4dsYZ?)m~pN3oy|FA2sj%9@60R@
zwxtycnj&>dhoKBvTbY|P5HGz#01GLHP*CeS5)=^-hICldur}<Na5$Z{75-SE{X5g;
z*ugdoVSppM@x1L3Kp@Jqy%|)AcA9kDu=R%W;>mZ-DxHAbDkA9(7z!>y4;asHzWw?I
zwf%M4+QfsL;o#U`Q-R*1D2>Gchxt}Us0z(jl6mh__GXgG_IVj7Yu~SJDn6uAQJmJm
z?-&f8hVQO45g9!LJm+IB+hhKZfg^S9`hKc|PwO$;dxK9MNs-FYe5$JxY0>7BFE{YL
zL73|5Be^xRk-F!b`KtZ&s<L?y)sFpmOVB;47c#cEpj>u^<&OeYrAV%ks&SV@G6LxC
zRr=C2pY5hn+p)BRZ7w4Zin5dyewD!x!4^Md(VZO$g{gs|t1}Upcx;OWKau$(%ahb@
z7976a^rYWQelOSol4$}miteFL-@EjZzDJ(=FmYej#0zV<8rymh+dxpGCM(a=_s$H0
z^B{5fmK(dijUah5QvbqdSg;BjXQ?{(HUN>L_jVIO$Mh{(V=wIB%Dd2mZ}lMGT75CO
z|1EapTV@X8)kgob#UglWbj7!R|EV*`Q|3;L$hZExU;lZk@|X{twaVGLMZ*|-+{eTR
z^+GqH11i%>56mBUVZVqi9zU=!*$`-eX-6S3J`n-84H!4;0Fz3BKr{dn%s^iSL~!4^
z0er6PfcS7T)I0Hb8YZ5uBreO@&`a>1ghKz{iKm8i2rQce`XalxOdk;dO*nwEH56XD
zH(NZ#_XJy5N4zNyZ%*%EEB?#qmZ`J^MUUN-bp!+HH7K$Blz*;YnzkMdmzTdpu_799
z>{Y}5*$jcj|2T2H^$1u!*WxG`1mvQ4wAmr*9|bT(<Wv}eCc{CVlD1qR06h$3f=(c0
z>)cwIAwVouq%5}V9urC@{?lvM!)1LW+8CfScCgW)II><MK$&KP4O~WOwTm6nbiKBh
z7pAezN;&#viI091IK|z3q=Jl{Cbp9vTY>JJNqGEs5=kNasg3uhk@~0%+adw0`@9d+
zD+KOfRWw4UU0{ip++J#5XpyC=Lo7M45Tc>NVT06a-)S(0qM<lSVHKK%S0^t7<Xi^;
za>CkxTLTP`JYV4;f#?ahwO=p#4OQll6VU%M>RTYgELO;f>EeTDZovRhD?EkqwK!RI
z)DQ$~NqkumwqRiyYWUz__z&DqF~r+yBlfhy5$$sKBBO?=CXdOXU}?T;UrPH`!FFk7
zi(o;@UWA8ub&J(&>n0pRyii**5?GI0V|<*x(%z{BSV><j(BHLDep^z;47e|<D%k+o
zOvntNTTkTcAF4nnAc^IdDl;r#kc8U5lT&~j?iAXbIIuwn$Pru^l!rer<+>CCBz!4>
z&Omd~mTRo6{lU;jqM9db(DL&$vUp|D<?s9T3L9h0l6`v4M(eUe0lK7)j63`z?*QY3
z&eSc^0F<Z36D(kQlptBqW3CZTjUFb#u;S>Rs0q6MU7-sGpn<knZk~b|ilQMTY$GSN
zbI<U@V5z8u4gkU_14aQk!DrhVupYPV0XL!n)Xt(}CFAh{!*49ILoIU3x?-Nwd5%vD
zQ;s{RVF6E4w=(mCMo5F@VHhY1bVtyauFpFrFPBHn{^_FsTv^vNWTMr9WVydTz!GYo
zaX#!|FomPE(I#yazzoAHKdhZ|$f$4#7?GCi7E)WU9yrl7X2FuMuE)hl^!Gd}u0hJH
z>2uCx5r2r2nr~w|wfS~EXBEwSbn~sTmjB|xHo>2R+fylatPXb6`^wY7lD~;{8J`JT
z_1*Pc`$ghR=tbGS)bJWOj3eP3@7}&E0|#3FQPT@y`Zg<rPpSNUQM;>NyXTN|2C!Fk
zfXYfzi@n?iwHN_N2Rqbf?uw)m7eHsG0b;ro)2990?2su;H<g*&T-A=r%F3(yzZN>1
z80f943E|V%@O)HA0UE3(?l3;+`A5J3Ibi$O3|4vDhXBPH;XL5cy`7%yiLXg`ber0n
zZ>;0A(2ym3f1#3)3Zdifvw%uLv36?PTq&~w$*iO;!BnNoUH5o88|Az##ji!LO_n4I
zGElKdzBcjxwqqjVD72HGTcN$BT6Fs0NTjv~1@$<<P8v+4=ial<1)kfkX}8kCN>a2Z
zWu@^#M6LB#a~e->9K|b(^+XGnxtWY!#ypJn_DphIp6s-0Wh|Xs(4|zB_2J%p+tb<n
zSPPEcKOMvW>`R$0fIS$;@H!(lbKtQS<2W!3h0>GRjwk$@g#1hm-*ha+GI=^<GIWgk
zr*~dW?f*P5N?_ThC+R!)JRo<^gtP`?k9}=WdMY`f%sh5SWQ<<n1JU;Zp1a#7|E#_o
zSdOx?gna|!=^=+;EHGKuK{>|ZwLe2`MfNC!vs&ZwF*q=YMfvfQz)BzpaUuFQCI9nc
zuR;E-JRL25fjp)Ldgg(G#{OHA#6n{pQV9?CwiG!LP}!aemRh-6Pl4??3;OVoYGeE{
zP(A^MMZ5<Mgd73^(Dtt9-4}>jMKEX+WoE6lJ|sdGy9C*b)gP;#$8&fVHjCFf%CLiU
zMe$D+KeZYU*~`xUgUYalASfJd`Gz1c+^q=!(Gv;Gw#gEi_pFMpncy&-6ESz7sB;nE
zL9e0MN5Go1mJA&8B484+v>A9h3)JKm$AY(;igz><b%r#9#qt!4sx?A6sa|dsh_5%t
z{@~E~(2-`p^PP7gHdyZR_}SduPivp1-|(?lcyi}@)}dFsS~4p%yKjwQr1d!B$IN+$
zti&C&eKiTa=*9vP*rmu7=sqlO-!$qMWuJ3=Wu+?>V-d_s$IyR%eZ#&XK(bgS@$T7B
zhG73ZMm~Y`xW&Mn_!6(R+i(4|!pi3&LtRx5fAtic{m)e|1j!K9u5W09(W()|$z%e3
zfBdPtlL;nGZ`wePA_k!tu9HTBJmqR0oF5Hf=Ce<AWrdT`a1_4%X1}I+px!v44C3ZR
z?>vIM$J5qb+vK4FxT)o;A@(A2KixK^;m;Of)yY1$!=8SOlep%z!=5{MHq<^h;8wXt
z;!YX+?y7(7CRNRVSI3pmW&>(;=UAhgGlolJCv3R4_Z4l?hR<*hs(&#?eDxbXyXcbI
zjtA8_O@AYk-f(VAi&bdt@T=CKj8*5lsvf@#IiX57BTu`>$yWwG+p_C;p#$XJIqmq5
zfTF&;R7iJlH&!f#Q}lO}3p)YnT`uawIK&Omt>>&)oIo103Ye<~NWp$>_j9gdcb3=+
zTmg~5DBP!Us2&$61BZ8?w-8`YXQw}xdu+a=d;j&J7o>C_j-+F_HFN9Q9dQ@S5;FE(
zMos~&f}Kslqyc-*fYT)AsN1z8v&SaJ8WZcs3sGT;%N71`FiM6`>5ctl{I+akqPu-T
z&uAxgIKwfyE6h1ewnX|9U90vZR;boJb@c^K#LX!~Q?q$^>S`5w$32W@nAcTsT_r|X
zt3Yc(I+j<AdxFSj=G(bu(sr8<8r#F-bfr9Y-Et`HBtd&sR6&en=*i<a55A#4zE7~9
zW^cSR)2d8cpK+6mx^cZcyKSWIp~tpU=v)jnZ|~FXrufJq@dKmnzi!8O&r@62B+F*Q
z%}8PaD3!I(%haK10o6txV*u`iOP*VWn;*Sv+?IntUzr_3F{(>_(i^U@zou`eMQdSG
zP=&rOJaPr~4UH8qlot%`N{AU8^9U84RrC__a_dK<N2jauN6AA~cW>xx74@!<(Z3ZF
z%w2iuzHDxHA*DtU<@<x4-+%fpx80ZpdxiT<<C7_mjj(SzymBnBM>;h5T^Le`QPBl7
zmp)XZgC=+e+((FyQhwf$*wwMzx-wiU9V;a^HPe@(`v|zY!guwj)z}A0e-Y{3NbNL!
zCFnM7YWMg~Qk;E-_>J?<@1Wq?M6mW#>}TUJFa7U5u9iBVhHam-^0UxrOJ2G(9k61v
zDr#@?)sgAzj!7${-GQrwEkqGCznu9Z4-xeT9k$~bRgBop?BaUu=EzAg0PdcFTC1GO
z!kW!u_rL%Vviz?8&VAuJgXCkI&%DGGC)hxZtE|~S-KYeVZe44_4Obc`q?kjx!CeL1
zk1gMbfyk5b@@|Q=n2EA?Js3$s<>-^<Hc=Jh6KdaA3{aO6X#BT-^H`|Nop7wwUMajg
z{M;hExrISD`jX(sVzCCPc5<A9i;1hU@LO@|60_R03yqYxSqB0!lvAIu-ZfWxdb8g=
zmmd8lvJw}Z-bfKRf?GI#oMCo)LZrFJd72d`99Px~IRe{@UPowK?O$9HoWAT}{#ul0
z`=W)7R@`h_?3DhaYK*qYuNt<oA-qkDp<SV1$^W}R*%Sm5@@VJE?JPuBBod*JBoB?G
zFlUq9k!S;>_T=FSK53vTsBaV`XE=X}hJc|-jC;wAWW5*NdV5;NQt(w9IZl4})WXaU
z9LGA2<pCpR;swFghU2SN*WHG8=1-ZmB>wu|Hx~0@;^lCxkef#0^YG=#%&ZixcbA8z
zpP$CHwH&S6e3L|vF`L&)3BnS!^#rH}tJ{ba$JNr{NGKg-cja-Pj<1Nt4b-c}1g*tv
z8t2{8L}lbVM;T8Z>zGS9JFw`2Vo39}WCI%_5c|e8603%05htG;d(NICu6(^+LP(5T
zuYdinkD$aXSIv#77%9c!@ZFiB<Zh`AuN&0bJR2T3IQwXJ7i<n6oMeCRCUKD3ec=ws
zP!O|()?~M|eSO_?dj0djwysQuTRKe;mxS)Y1jTdRN*$ba(fat{nmx&NW80%GR0M&n
zbC#7OC&e2w5o0PNdXZCk%n#04*Vw!#*b4D0cN$RFeio_q+3;ma;RFOL$70S#WQ@k>
z;L<i@ZaY3ny?mo9CGHKeeX*IRX>vCTCn$IrH*0e~yLh*e0mb-=^6u#{sT9SVV+Q1U
z8gClb?^gCa5!7={^LgtzD6`5>-Xzmr)&^S2*J$_MFZbI^UZu8|ui)WP-@Pt(<yOO;
zPFcM+iO<qU-mgxL@i6g1_C3=tclA?Ck?PxcRkM`r{06ZMUj>`*dtt}>3J)J>?0E!#
zsqFC%fR|ciUmgYvW)Q1<yh--V<Sp2+wR%xZhZ~nn6oSuug^Q}XO)}F&GfPOR7uxsw
z@(xF?aZzo^?xPQ^k8GTRX#(~7G={M#xq|KCXCBsPn@3%z_|B=@dFB}W=|JCg{NaDg
zVj4q#7>7Cgu9&`84z-&%&;MYO#q86i&F3)%HE8TD&8OJ6G3ZCdDvQssUf+jsmVDcl
zRWJw4yMqSdqC4F^4jV@aw=Y~-(`ahKy&xWyoZ-3XD6*C_;hr$0I_9E)$??=5X|!_d
zCi|l?=S;_fnb?-y%zflckk>+T!htNq^S-9E9d#2Jxc?o48bqq)eA<y@hAb=yU}00W
zS)z*XR}-VbH)LkeN)d@&i`_I*{E>BW&N_K@YNL6$h09@_p5O7(Zy~*fwF=ew*dI})
zy0-Dyyo+J7vV|nlm-DrEW`o!tz6~%v;n@F>{gas6b7P5f>kW}#o9Wj(w7;bpw7J*z
zr0%k#M527PS$DRf@FvslRG<LmJ0ZDtDV*!2&78=R@94UMM(xkjjW+GUR6-6SBi&Rd
zmU!G<*P@C&DZlrMW1ED8B%Nx}`B4pM<1u&4-~j6xuaj&J;|c^z4@o!jRVc@WQppcJ
z{<Ye{Z^9zYBT(F$Cf#{9Ir@{@k->7?9hdilKd<BF{;&3~{T=Fci@TO|(8ZQpQgR|j
zg_>~*QG{~84MG>WHkjm+YLuN+Qm&JR8I!O(g=#8t8|`x_h8c>a$h2L@9^=wRlAQG&
znd&*uKF|IGwjVqm&oke7-|xHL^{(~#tk1jF80<QvzjT(gC%=CCV%8I11_8{wPai-F
z3VJQs9FA_4;`ZPi$5_C&`m?-n)==gwCza_$+v~S)8WjN{qe_2JQic}cO|<05?*+m)
z60N;*PdEQ6bD+^9@8{Jy2BiKXlgOZ3i7`-#;d`;K>31%Api58Bs?Ihy`;eUn@fFoq
zvRxtE(<=M2-F(Zg6N4c)L0*N4YRf)YNc9bzA|*z8UeGa+qO2rlpNy3AK0O$6Lvd=)
z@q2Wvez14KBH;C1S%UUT%-d(e(5XWWa4K~S?G^=l;3$aJ`tCndNe2;Ef$?S^0L}8F
z=J5U}p_C+FXgtPlNJa5y&Rw1Eb=*w>kGnckY?y0MTWSnOXn$NLGIFy0gVrKNcVQ1&
zVo&5;T_(F3HBon+wsY<eN{AWyKiPHYw=)_ebW6*qV?&{UdoZuu5hQe_03}uNgjoE_
z?6CqfXi?`6bPMQ@u9nZ{WNP^Az(GzTSr~VZLq2$0v^XYZuxo;eXO%QCwA6B4vwYSX
zUbYJ$afp`rJHfGe8k8)ML6E3afxPF#D@xHqk&=rah`u1mTxtNfHYgj%IN<30!ku$7
zYw=IXOH9^o`*$OPW5t*XMrH&f<8qZ;`5*UCb$Ar)ihvhhL0_u&^s%YkPOiLEhJR8Y
z(b*P*l9yDkSA|q$8#_XC(XX=a!v=aL#YU<{&F+c-TX*fOXl^qRlPvq3gf3M!>^n6z
zPOyq<P~X(|i}6Nl%U8CJ<4B7Ck%YHe42M)5!Qw8a4H)oE@a$>{N<@~0BqO4ekxZ+t
z7T*BLY3;Y7;xR<M8|K+y#-GK$RV_Q2h(c?rwDL8;W*8KFS!PJeKLRcmTJN`UKykye
zR0CB6MsftIxv_G2c-w{VHur5u)G--GXXtlh=nrL$9OSOYF9XA>Rb+;L&4O#AVRSVi
znxf`HXZMPcV+RyCZ_7p%h#6aaeQ@RPmnZ;|Bdg({n&wp)$AieCzoe3fpRR0#&Pd+8
zhW$-H0S0`*c<S|GMCSOUa$vKyr=z8$0UdKr9bV?sVYGmun1>8>rnD$o0(7k_<aLiE
zAw<%|$KI>9RPzREeQUl|Khh)w8iXKnp)JtcFom7Mp%@0e*!ToANBzJMtmxiKR+AS2
zr)~Xz5>K`%fOhkf87tSvn~8}~+5G<Q01{?srKhF!)hmT%*qhF|NUInlC*x&yNe~4}
zIQ6KRXphgV)4I1t+%)NT{*^iD92trLYTuWH`v6-R+bv86M3yXh3t|AriZ&b|^baC>
zEn5pnpv@tI@~BnnC15Xnl91?wNJ@F~09B}myt<pJRZ?ya5LqvoFxl?TfL2x;DtcUp
z+1x=2Aq$j+udAmKvTYDlGd_?-4&XF;9tJrpn4Bt)LtUsAIb0EzTI#!I3Qa3J@956_
za@6r<1YkxBEUsTN7|yohJRs7?%vGz|p>RhPl80%6LP#l)<5-3}&}vG6CMyLX?8GHa
z!Ts|LKqYU*jQ@s{C|r*PD#afzi=XI@?rA{U94Cf^(dVXuo_2yHbu-d^VpK1h=dB4E
zDE;8Jy-_1?)T>WUra`9D0IQ}A+$E9lxp$pU%UBZzF|iy~RF%{$3Hw^#MnKa!fd!1)
zz*fo=H-Cx+?#F2dw`0skRCni@&*N6NUGHQm!&R{$Zj|P{zMo$?nysQ3*o|kP9>k0v
zLUg#m!~m9R1As^OanCXwYJP@AI06u<=F?7h-_z>Ar>0-lsd(OPS`7UXADo_zLhlag
zsj1$VgK4dHoA#ImiozdA^0U0u=7a!(9;tFo+=$Wg&{QqMOOZ;=9WIAE`lJQt1R6Of
z%0IY3NVOZziAi3!QGc({r*?+;a=GWfo?qVGf1|?NNk)pLKlFmaN!}{;7$IU2y$@i3
zEGcH8J%yL(NA111i5wn4<-B0FI5oGheBch{C!;m9PRwdRp1b0LlZs~V%uSXOP6SZ<
znYnLQpc<20z>978=|jg1=~78rjdSDP>VKZv-|n01um5#$D>fo%_6gGc4e_-KA{<nE
zn;G}qa-DEihw45H8Y0tT=z2X2d=HXg83~}K2OqLS0f4+2M?9ZGpPw7$r7;lE+wEg;
z^9?Xn>mguH&p?rkmG;mFW?HS*J&F5;(?ggYL8HxPVSjRR3Sc9^O;f))?0|FSTe*jj
z$P%dTP-qde0jhpfO!pI}HbKT@2=|6C(PtfvJqoSP``{nfNMSXdD2#rB_NjX8Fk8DA
zRD$F5GZ-^saIgeA<drtIGOD<0>P-mmOHf5mb|T*J<p^Zd_E??a+*S4|Ztyy8X42*j
z!S3l7lKJ%j$S((ks)ph|iHByE!2k3pgA|<muKI0v(Oqa5Jkg}bdJZUdFYa^CZRbtV
zZr7Lig4h~z@ru1i&(Tc3e6*MtXx2(tA_YxgW-Enr67dz!upn(K2w;(MF@?1DqfOV>
zdp!<K6%m&dtFYf8O`dBG<$8V&2EOGZ7QboHz1P~pIJQj2!j&K~I(fT15WKP=UD5me
zIyKAWa0~6ZlZA<g075KN!gHv;%r^Dw6)w)E8}m9sB#Pql3d&rLPxLkL*6cVT^T;@M
z;yGZ>1+ckzkAS`^9Ra$%GR$+RMMivRsVIiK8W8N+lX9#}5SEZ{bO}ma#_S-}HHssH
z4oOaONF{(i|E1EU*wdA1FsK|os+YUcjcP95kcIMUQhVWW3=ZftEq9%1g-><+rH#a2
z_cSqU>hgo(Sii7gT2OS+1oe@6`7$p2dIYwf@D%z3)<R8Q8p5ggIsu^-{~oqUB06b;
ze_Z|f0Nr4KUIa3~%93q?JnW#en*6H2@~~25wmxZHOBVzCtrqaGWD^JetpV{@0d{P%
zYEBN<0B5~kej!#sTw)?PYlU~Jjr?fho-J4rci6q^4n+G4IP1bpYrSCk!scT67mLKR
zkcS0GT?xnhm4_Mg9~O+?BCv%YDveAK!hxc%d+8wfL)k!Y0cI$VD6?V`H26Y^S`~O3
zv?6z`qb4Bb=Qn@7YT|#QA0)Op)z+ku2r#~5Uc@qi-!BKh|6b~*F@F$o$OOnfb(K!A
zoMjr*f>7$m<rGC$VAtw=Wf53`WzYNtUhl`{Tmcg>`jp}73ZSrC1Kw9K&mUfzy;vlH
zh9ey4O>f<r94J=bRP8XlT~oHX;9K;n>Ida(+0-jkKsf{!Cip81i6AEjIg6uMjCuL1
z2&AXN8~H{-rLW*qaL#>iRo^g5{$GE8;oq<HPmG2=FHFH4vWfM0St<lS)_d%BmzevV
F`!95VJ3asa

literal 0
HcmV?d00001

diff --git a/docs/assets/design/fused_moe_modular_kernel/fused_moe_batched.png b/docs/assets/design/fused_moe_modular_kernel/fused_moe_batched.png
new file mode 100644
index 0000000000000000000000000000000000000000..8168155b9dbaf16528b16846c049054f22c6514f
GIT binary patch
literal 193655
zcmeEv2RznY|9?qYku4!y$jaVXSy|beqzjjkaoHm)Wv^_JQBpEOMlzyQqCpfHAraXt
zWcEK_T)A5J-Sgb{b3gZU|NgzC>pI`_J>PRa>-{;O&*yszRaceA$31|%W5*7BMFm;S
z9Xl|`ckI|Tg1rZ{T<0pjvt!4p^Dc7wE_NOk*0zWpOnfq%pO|>Lt&q+xOnkCTyu5G+
z2TpTqxTO=^&Y9C5;R2e#b+{$Meyf8D@{F~uEu4w>m;fg?xWu9YH?y{PK{{J8@kxX4
ziuNuDTktPv2EWy`z%PC9mxr5^M_{vulN%GC6c4X3CpRm&AdfJ!L4whsF5rg;G##^c
zLRcU@K;uTQ%_+DcoSdzZ_M5YWZsru)yx?pGw?%9=gPWn*nj)Rd5l&k@!5n#+_~e+l
zrN9;Fhfii}ec+2d+-~bub+AN^t^2LbH}6$a7jTi6Q8$p*(Du-EQ`9q%mIKTAdNkCS
zb{_Iha0e?Dr1{3`%{{he#LLIOIVp3`%@!dMk<A86C)9Jc9t$;SS)W0zWn<#J+?KA^
z<_PDl(KhdNK_YEktR22?G(+0kBh0p#v^65!$qDKHbvFy7?bhnH`Z$1ve>FPPp!FS1
zp;kR>a~Hs6NKAwUwx$QQD<G^btx!(}T-=<c9URqptHId{ZjN-{ynmw!ohzGkL{ZZT
zi3H=JF%A7Ps*|0E48j%?H547Uo&)6OU%&SYHJrXk`X3q2!_b1qR9;X|LElZt-dsyc
zU)w<#C0@=ho+yDum;+jGeMLICSRpNu_HbLd?N(_gq^rF-0%94ET<PsT$B{?}(8vQC
zPa|AhJhub~?&^XBO;#>;wp&-ha-2N%q5C-@{oD9zur=Jq7g<PDp}{x5L1GGh+hn2x
ze48FyByvW&I+<;W%nz3dR1nfa7r2upupb-Z^22^xiUHAJv)9H0ew*vtyq8BH?GRvT
z;7LvhTeyq08~P}~WNpd4<(GcYq<%#<K%Xtq`t7+u_VXJF{NXyc=-~#pb={gG6R)5x
zpp3M+wHx?o34N4RHvtA}vl&bP?Fy>(>n6AzV867zsWbEs%Q07Hgt-b*4pIht2UlRV
zxuwhiNdQ@p_Rhe}{&--BB52YG6x0ElAz(V())rxlgirts?|^W!2DIKVsb5`F-@YL2
zZe{I)&~kuph=x0`h2TasF|@GuK%lG}B(wm6Y-NzP$W1fNCkW@}wy=QOk@hZV?G_t<
zK)bVx6Ve8Oc7;!fn_ET(T=}V>0>p4ZpzJWz>$lnYL($s0U66YVY_^Oz5C4{NaNjnG
zJR)0dR_Nvs8Wke*-$xwzHYbF}_kW}n+OjUcYlXHL`;WImTl7G)LK?1cdl&078#dxQ
znfMxaepk7_R%ULv2#?UeqRhOqLW00=_=}Vo%xyy_x0IQW=LgCx^dn_Pxi)_vWfs_6
z2^v>Xw*G6l*nk_zwti*t1sz=>KWoG2ayW0eo{*Q!Bj5n+9x$w^>);a_NCB7M#N2_2
zR|a%dM#q-T@i)7ortvM7eFHhbKtmHi0}8mUwI$@`n*nzOLL6zxj9UYLRBG#jowd0+
z<UjooalRJYP2&8(hxrqsL5S~bY2rmm4mvdOY#Cj2XyE(XA%oy|R^T6r41c}F{}smr
z<$g#xIl&>P7G+}`kYIQ13~}byM?sBc=b;YW4MyAYV}!mjeFDN;LiqcB@#dK5zajtk
zoALPswqRz<wEmmp^8wRxY?BbbzUUuLhmTWe%S3L?ND$?@q3FBmAZ)DzvQEEBij7Hv
zZ9i&%@!K=q>b<dF_`ROLwyoF_V>Ht*Woyd|{(?2gT01+yUCgY$Q|Pbl<39`5`2hDI
z25xbhA0^x9h8V?tbVH2lhmwf@{*F<2b0KJ~MZxKR3e_C}-M)c!2;NbTHFZL{0H_wA
zCBFjq-|cfz_x~xrh{<N`@2jbEa&rEwKFNP>lBm`G+w_Cv8EvQb4{(>hK_GsV8-%W<
zLO*bqP@4Pq(Ng{`WI^N4U*IoonuZ_vw7(mdLph<pyCvq?as>bJdtwwte%sJWLr4lj
zuu!hT&JLy0zySWh7^5}mZH9rMOo)q<HOOXgvnX$FnpD3Ss2|z|v@!lv{N^!*^B{m5
z`Rg$@Zf-$2DG*HmOJZz%sNMCqo*KGk5Zv-t(eWF#;rjbn2A=POEhw@1in*J>^M|2O
zMTN%%xe@ebv3IpIu|<O2vI)Wigl(aX1BByta1Rp~5I?ow`u;l-_}@i#6sdlw9KSNP
ze;jD?qe9Cl)j(z6&{TtuhYuupHXLAd<v_vf-$*(BE3mnd+|mberGGIEhVZa<LAwon
zGXR4?84T#kcJ2)N0&(XzgP6Y$(Oby-`w+dAME>VP^cF?XAR1)0fJtNG1*pWW;tICA
z-{|`v+2j7rF;bZK>tLhc4-5g%k1!I|?{8!VzK>s^4F4YnL@AYj6%RKA>KGLI;pOK2
z;+}oC$^Ays{`a?I0x0Kc3t5C9#e!Cejx4Ca%HJGUwxT9~?RM-7u53U8RBHj@0~*L|
zLj-j3&%lT+)Bn2|0r3%H<v$-IwkU#z5hzoF=5BwT?);^Kh<`GY2#BEEMU?8JOaYqe
zi~PW&L<Q9UK2U<PSwG<ap8zEq{~t>>&=CLw>IX`-e93!!NiKc6OA$f^*3krt4;9Ay
zdt(Xz|B+NE3L<_NOZc{m|Nik<f}+R|YzZ2c{COck)eWsed7j@ERs1jnf;Nx;OM4$)
z5#DbR>jwsg2L(^)21Z~D#{Nc-wUy!cftdYa$Wl?^{TCtYH>!C4aXTKqZ(?a*D%sId
z3guCt3(?;%l7^bmU%TVkv^oDy0ty8pzmGQmS9UHaiu}L~pjr9tx(*qnovF1w0=2vO
z-5k_5PHm52h8lz090^`50fqWjU_fqdqytD#al50kR^UY-TTpff6_A5wxIL8r(nOfM
zg0ga`Wpn@Zm*NvQw=k#@2a(tBYQed=K^z0R0@+Tq8u2gH<3E+zKd}(-dwdc`nHn_H
z@<V+3QcnK&0Vq_==eH#jytS~Ec=}06D4G<1=d1kx*A0Nk{%|KHARzdSt0RI6>Y|Af
z?~hRG?<h)J-q%;6^o?JEur~+wej9Ixnc2dfovqD)HltMqe;tY0PRele@^7VLHp6Z_
zyuY2Af$l_mYvm87YyPxKQZ%ljBK7F52deFtUv&EI3d1k^c<|u|Rg;3>R5X7nq(fP!
z%{11(Q8Kyt;2%6v;M+%{EGO!bn=Zq@@yP#nl_uZ!`70E7{$WR74IsA@fMq##2%F*d
zHUQ1P#5FfZM-}6Lqi+BEi3@&#uQOp^gcMDcPyx*!sS--C{`<wADAE6)Gmn2f7KFwt
zl!^Nhul_2tw}n$%@o)ideokItk?lWQnv9a1|GX9ay&3%P8pR(KIDccJ{+1rWKThNU
z(IL8{M4m^4lXokb_Ma5<Z(b7l!BhWdMVo(%qF-6_O;G-7FSOb8hh2bUPDoIZ%8mSW
zPvDD#@Pq06py^-U;s5Wu7Q(2o@|ITcaHEn|Xn2L%kD=ogDjtVYo&UaX!T<9x^AC9a
zhow<!<290BtU-l>&VTcFOr*A7F!{wr{r|U5{+=)Eq5uxHuSC;iKE59mIiNKDZ=}jU
zs+ap?J`?I>m_Ms@=`W1yqw(**zjF{qZ8X0C#<xiD=kFB+QIh=SnJ7d;iR}Nb3-#yk
z4*qOO|A*}ZgayA2rtom1BpM}5f4%?<34Xl``2Lyyy8`^9dc?n%ss6_cFbWm^N&)`%
zng6o_EU;DW^=q40QxJ9DI35NSeg4&b1TXJT&Y9Ub&J1j8H%~NE123{|9%Kd{@YT6z
zs0V&mr~hS>``z4rH)W0*^Se9$@T$N`na(yF?~#H@fYog$=Vjo|h@Zc7@av2D#>V`S
zt+*2coCgFq-8iTa%B(wV9zz07^AnI}5|9Ng&{2b%X9jJ&4)t|X-X0066hJ;6?Uc8z
zroSQ;&x5L4-g1t<WJ=N8Mqbn|AAJY^E$scJJ9ue#Ge<K`IV~j)VGUEbo4u#Jvmxrt
z$gec<8#Zs}Mj=Q3Upv#o6mBML&IfM$nte>XeBj_xegSaqBzVgTdfs=161?2sl>q%p
z*73&)ZxPfHTqtiAwf{n67$3Lrm&07pdB*px+5PV`4G?UDX4@w|9!G!^bU_KtH>~?3
z8u}wfZ;Jw?vwsce1UY%XPMq+dGz*P+sLjb&jN5wE2nEsqeZKutv~2UFPC#Gis5G>L
zHx<7>coTHksGf2LN0oxlvZy>Q_>6*?U&kXSaLZRP`y01%qfSu+_iUZ<W{a=@uT7%9
zp~l)Q+u2%-s;iypMnV~B0cNPZ^}hJemhc6Zeh^9AX!_+zfXQr}Pxt$==FNWp+UQVK
z+a|LVz{zn4sG4o#u-xrb`QLm2=r8YYtwatEo$a(KP);E64?cskK09mZeSdCn0x&4p
z1AoDd)^@H?egH%-(Kw4b2>`16^8{Djk*?rSXmHTXhKb^KvxaWotP0#N8{9rpirWU^
zWWRCBH5_^o7in*X-~|6_TUkR--$W{2ZgYeM+|_n_LcE}+30(eaF`#?_(hGOsC4hfS
zq0gH$<v?0+m^*MlOM}K_1AjN~|4$0ae^v}tkuK2sHtfO1@!H^gV#tDMZ&Y7y*200y
zkmhZYe&cK-X!YO-X7C%lXb6r)w}tp+j&J~+0uA8v#(0}IeEotOw=+1jWW(%0X9I6C
z2pVVW*+5|6kSc5tT@~`yz|q0b!|fsI`DUmM#(q7_H$9+>&<N0wo3lZ909PHjkDw7+
z1n}WGHslU`c7zTd1vC0pX8vCQO<vULCupF_$NlAv2Xx%zMTP$Ulfb!AH}#Wa*ikiA
z+XvpGdwv^NQAg}=Uf8+;`fmp|PcbZaVC~tlgKmeStdzFL$?>$^soFYJ!6xd_ld8K(
zwWGDwj}m54WocDbWmQ$l5@yBk8Hpvd&XOgx-fbP7NVwYcxye1KDk-@q8Sa++DP!uf
zn%hvXbF*ht=UR|u-(q#lx|T|Z!dc9p{dk`{%7`nPp^rsy?x%hBVvxGM!Dc*)@#CMj
zGMFsm-c~%qq{MVPfBbW~XyD+_uF>tJg&l<t%Y@&*61@ZGCqIESi$A@F^NDaLt|+bH
zUV^}%E`eqI4%W{|pl~l3TfJlHg{cF}j~56n9R67RXX`(TAt8IVt|^%OybS$M)=vi}
zIQi3S#MAVE!V;zXsd0a{ejN1<$9+Gc4NmmgeMFaw=B_+?tNzn9o~?`W`VA=v*%6qw
zk7G};^iOzv6cT}5KPAt<Ap+dkVoQsXjZ_Fp>*%v4gdOX`!PT>;8Ef~mV~x;a+V@tl
zTQ(WT@(pu68JSBOe3d~l_^yr0>2n5_liFIv=VqCT&u{(3JZE1?9(t$|dzO|sg!sKq
zDA9yKD5ZGHLv_FDzEaj$g=K!$!AR}KR~dt`oUTru9H&VoQ`5X;t`|99peQ`_g(3cD
znH*-5{Ic2}XEXKObl>F`m3r|r$tU^s7%En^r;EcCYHNN1CX;*Z`|dJV?W581^NmZx
z@{5~GDpU+(#9^||#EfO}6;n**u6cNY$|&<LxlW`1joY7dB@1PvR-Le#vg3~KAn{n8
zs-2zgXYic<aBHG=w7HGSFHchFeaf=)QkUz}K;hF^cIRVDEGwe1qBe(?-rhY<%&Pfz
z4I{XY^Q_R3>|@)EnI!Dso(ei{@2C~qqcGfLSDlCni@%3oL9dA06Mu`ULXCmc`>fdb
z`uX5ur#Vi88tdS3Nhi(T#_7r|<x85pSt?A(EL9@9l0kk2x+0vTqcoDvs;X(?g)n&Y
zQv)(GBSxeutI-Dr)>Zhz*<`Um<9mmhn9<~d>)0vNVZ373a|)S$pDwghFf>*@h`U$z
zfnsnV5ylbZm=VN0e00ofZt{Gw`I?(StzF@uW{a>vpRZPN?IoTU*Bh6jkE#<NeX_i6
za-s1)!($!HUVK&qx^it+jibG)8TEypKpE#}9jCXY?X5V59d1S<`=xh^81s^X#M3$$
zNyMp%*GUExV@|)xuBkj$!(UJ)Igv0D-u~of)#Cf}wo?jG&ypE+BR<sd%SHjaa4#H_
z^JGOqrsajk+joNpWfFM1FKaqBF1YRClAwQAnOPJr-0n@ht$v^Q{bzHc6wcMQD!Obv
zGJD`)%#6`X#h}GIYCf)!-tZI3Lbh?%O0TZGU!l7+sGS)ViK#&6fi32<oUwYxtJb!+
zLhV{jc7jFQ(;LoHy|e5)NZowPPEa4-eoDZ;oyPQn!GqBXqkguGZ^JRy<35|$F4M9<
zTz)tDss3VAngsKMY!2?+D)Oe4n(CxRNd_D5^M~Z6F#M%XjisM2ws=awx~6z=M8CGU
zPqtIDV|V@fz~OBp@HP;;NO~Nvf8aTh>{&8X2JF%d35yS}mUzcj<~3`qcCub%4$f#g
z_W-B@_CAaof>joC1WQZP$W|%Es$3?#pvPv_UCe=LdXB|>+lGBw4I`g$EMtt^_Nql-
z$0;Mn*F#>T&3LW8)8>kVk_GXf1{p$WnQz+&&9V{G`Cn#m(9}sZzKf`;PCgVEv7DK#
z)|^q-l${iOo?%-7L+J0}-XeOCtmerB2468<#h9_z_6y8s*AmVc)U#~66$8xwS$0hF
z!cKGTc$)6J6<Lbu4-<@Fs|Rqau;rVuV!yz{*(st^f%<($BvBtc!B|g?;fJ_=W(+BX
zY;s=R)K=oPJT5N`iPl?ke~w(bM?rR8<}T$Hu|o0|<&%*i;jH7ZbG#W++v+Bv3Ml8}
z?Jq2Leh2$QCk88wyY<iGdxE1BpW{Aa@_ZM7DFbyrNF?m?G2oegsZyn24rkdD@`W20
zrYl41IIA0qP0)x(0uhfHk8JRPd0+=!-;U22>l0Or*XGZL&JK?FX<j|S<2m1^haCh#
z*~yT5OW}IvO^Y_~<K7rtlP^i(dX*%Eyk0bSNM%88dy2jaV2W!FE$18>@(HN|!tIl1
zW8(Z2F8Px%%^8hc)4o!TxuL)(8P2j5_i-4p-!hX;DqJp0P>6wf-bAX2W){uGQ%|As
zV+X{KoyE~RCGd4a@TYeZn4NK->`79(xvWH-ux?mx+W0o;erMbcoB%5S-Fd#z7h5u)
zT(p1wV1ly1?KpgxjrFAb_Vjitf$8mXf$y?41k*c(H#+1p>cey5-J{S44M&73W}E<d
z7ID1iCx{#4=Ius0E*Gx|dCd^uigMMN>!DeJ3z+)!?X-FvpX#6C-nw5tbf%t#+i_?W
zk)stF<9fdePuQ6_L>MFF1C6*{L1~Vbh6H<}<hz5mp2UGY$}-JZV76NbdKO6tiHJT{
z6wC#%gDO?jeED4&uiyNI*4w@U<QK7<IPMw5-BYG_7<#jtQsTW)vS@DA;FHyMWy)P(
ztou#DXyW$$SitnP+P_F@t<k_&ENCh)gXroX!Z<zG-)G)-eWKXh8>tw}?JFzM<v|{3
z9KbNVOYsXd-*X2Vn?IGmTmD@t5Vt*jlfv`EyO;7^C3CLg9yf2g$oXl9Lp>mhgw&}A
zYF>lQcoc~Au+s1k$KVt#50}KyB#j=DG{~(IW&+H-UoN5~G(s=JYZ<#rYmQgb=)5tC
z+^OEM`B-jyTgvtvjew}|I!<ZFIXD{8jUb|zx1Yc2cpPx?yp84Fv?C_Ik}FBtTt@tN
z4mEWGQhYk>?@v+_lm6UBv43BQe8+yEM0*8jWw%A3Mjlckq>6CnFiw%Q?eut<S@!36
zNSvl_<1_iDcHHN4Y>EeFIva;t?2NRaZ8yhQHhyT4qh^QP{?ig6+m4<01wfYN-V_Jf
zW|C8Z9dA3y<|_%};v3@>?~93?vusUe1%|?e@Zw$bvOy2#^QAY9c%<u<;S}^2mk7~_
zBMuSA%chP<Tn$>5rQYH0J2EvXwMtzAHx7!$4IKw8Sq-|^MkRD@@k36r<B)@5oS5RB
z3zzCR-w@61L?h58h(JQG1FzyLZj2;<`NczK+V`=Gya?=cMd$0FtFPRt_w{zf%ohA3
zeb-O^!XRey`AgP?$5Ts=cfBekrzROX3{5BY-h}94)>u(!vCpJ1kf4)yn*8fHPs|iM
zqb<xBT3D6MgBWoJi0ikg)UDpF<xeR!9Zo)HR+q{IsMAOM$#?BSEX>h+q__0g?+@Z%
zOC>lZ!6I}0SzJ`4&F*z`5gG8nT4sl}DWg{RH~gJ<Pn~-hO2+VJNCr+}Z`H|z(la0#
zf;)9X<|pR+KiC2?2~N99=dF`4ovgkyJA>7U;a8kWj}v|LDB;CxZ1v>Lm$XhNa!}jP
zT`9s7DjmOw#`7eI=aXFy=FLDmY~M}=t?WCfwBV#-uJD?inElwtdl}r83K)!pgqQ?t
zZqjbFpLEEhCMbVtq@}#_hS}#;S-qCmcA@4&^g(b{Ia#tYI7>NA?8J~9oPtxo6UY_H
zfPBBI`Gxb0UPBSFYdsPEk_JuDz!A8N?Oi)5ub6Qpx%6|j=ACCiwN4ZjYoW2q4q{dJ
z*n^f%XjP}C0=nI&6w}#mB5bGJ{KYkzqQEG0IH_V@Cv}m%oa~1d_a!Z9bOhq*x+$S)
zmI|a<UOvJ{r~s+)$J2`7D-uhm*V(K!OH5Cx#g_o<gRPHYw-OmMF;}lp-Epg0LLOJN
zkO2{lCd4a{5HG$CVE~|Xv~QPBeRJ%}h4lp3k_7h094BAUbEnd&C+^DE;O~hw51Vin
zHb>VrjVW{j_WhhFp~rf%q;H*@JvK^;A_^VOYLQQ2^}=cMk=K<4uZQK~!>(E5fLtg8
z=zR%d^}@1)iy)v5RflN*P+FDdWnC=FmYX`L<yc`@Hk-sV*$6#N$vIywq4wQ!x;Kp0
z>)@*;z!&Ja*NcM{l^$<@HCuTlNC3ktGnEFCIueghA8xr&@=8VJrwq+|YIb<iu!=H%
z@kw^}9Rdneb60{oEfTWC3#$6X>&yl<p5?C&n}`)LKj{QM38t9;$*b2J>TZcM=@dUa
zna2K|v*ZIYdBx@-QL(h`<tcx-Npljr(4@jG3{MQ7+*DdHk+UQh`MqwH7V2rt4nKL<
zmJB9B#w@xG#yk6%p^4NUqoxPbSUzagYExPrBN#dOES2X)_LC)O8p8f^HiC-1w#iwl
zOPMFrSihUb*Ru9@Dqxo5jB=yeESS|>E&-rCJJnA0wD$lc*t?YJ=vvs6P}h_v+)4Rc
z{Eca@QkV#1V)TraMe+AC&gz|&v$D%mp&yZ2&3SWKMRRPbZfKjAny<xmHEmj>Ot5um
zE;p9$Wa_<)!y9V+0@IB~7$#)@Cc333#nUyN)a~S>#-=YaqHwPqfJi>FaG$&-WaRv7
zNs5z@FrO~L*!?Y6x9pSu*+UM~9~?z|NAr)<yy*SysrW17s4Bk<oyk%t!;~eyjLA!0
z$ZY0So}>qpCwM{WWC-N{zVzFRv96+vYp}k^@u4<Wt?#(BEiU&)?#P?3w!6gk-~F&L
z9BB@N3Bd>I%OwM)U=`;=@6GV}J!v;yL9Aaqcrs1!J8nTnd}XyOew;}&cdFzyt)zO3
zvAmq_BFPdWd1C%pxLmQ0x7+jD3$ng_EXBS>4UHKmjHcR0mm>mYYwTEx%~EU@F~@X(
z{!p{4!6~T6o*Bd*<tDsbgcqbUM7`~7&`|?Aj?XzLv-9X>W;d9+jz2r!4NI0FYt+<b
z3_GH06Bg?d?WU3xUTvUm4!7xE!)Xm8oho?Pi-eso4!V14Ml<xIBs=jES<JvCxq3Fo
z$3q)F%i~G#q$pPaT6FtCWM%gQHs`2qTSFKHDQ88|Sc>*w6Iu5Pq3y2b(A8Ud<{emq
z@T5c83gz+5lgJ9y!HG;*k5>2_G1}$Kgi!{~;_`+ZI~daY#<JqfxIzD&H@@8hz*J1M
zRtQWVC3E{M?!%_`c}4A{ryS)OL1^PacN9)R`t+INwy$>d6SOsW9Y9ks>MYHu${VwA
z%dU>IM_$k~c#r2|r+y6c1-Uc1R#im@mMcG(H?T2Hq!f}H6lBh|<KvDMy!FWdo`TuL
zUHb831mjFGFuB-;p5vYqT$FuJnDET|(rH?~8ZIV@5lf^4!<`9ME=OuD5xi~NFyet7
zspK3#a9Dk~U5C(`?9rj|;&>cSuFM(TT<tD9E$P0)l(XTb-KUi+diV{^Z&Xe#Fp=F~
zJbBihm+N>qnd#jfE&D2`gNmqHYJ+7pbu2_(^eQGKT0!*9oL+b@aR?NpA;P|N0rtg+
z`5$Ej3Q~Yq6(uT{ne(A?XEIpugU7Iv(XL5(cvzpdQa=051tFN{XlH@8ffL(-h6VRj
zpT*Z)1s@5$UjtNf$oKTXtD<^c*qC%RsNu?*7gNtA%4_haZn6D0%irfH8KjAt<=UR8
zl5VX|SM5E+Tq3KBbzC>^<<WJYRecN9WI7|En@JaMnJOn3W+>l>F%7m{4Q{<+EzYV_
zQ+zmG^y#Nc!=50`Oj{-?-0{>Alg6R>JVwQ0(pVNp;tKaEPKNPwW>n6JjHiI`4je-d
zAx>We#5{5=J6!c*bCL2&jY4)0%&V3x*`Rxd(yf5=VS^DY$&aE_q{mG@{~^p{$Ra_r
z>c-8yyO_<cKrh5>XwvH@B@vH}gl@p&?90ZU-oIOA-kWb-Z2o{?Y5s)-<(s@bq6GKJ
z3w%MB*|qbtTqZ=dud7hoHB<(1bTyvG+Gp-jJIU+a>eBd8m7G;;+D9?r(9O~hFI0Tr
z+)-`kG=Z^~uOAlmeZzPaYcZiRGUe%j-)Dj42BRYGK;vm!1n_mg4M~bYR{jc}XeyWc
z=j#gII<oHuhG1$LArTD>PBS-#>j+|8Ly9h~RWuY(HmCPZMKt7T$dKQf_D1X)Qw6}E
zKfl*bu%C5Ym0FaIV^k-hQ~9-G>maMP%b1^LnYom8TSYLn`Yb16v{W<a42_wGxM-=m
zDOaM)h`4^3PrOd$N6N#2;YotMRPE)L9%{B0`V)H)x;<c4jmsC2jP{%Ci*pK`Z75xO
z8fkU+&dfXIIKi|CuVuHgWRG+>g_u_|AJfib--IYVpbZ1fRrXHhtoV`YW%)(9rW-CD
z_zyC!-4!&nW#@01b6uPkI(a&ev76JPVMRjAvePGXUF8Cd{D$n)g$M|9o9(gH;N`9Z
zXDDCXec;S(<UYw-a$Pl^KzvzwvYM^gxNOSec#LFoYV5I_M-=8;12d|!kM^6k-WuK^
z)A1Z^#Wk~(%uZ<WtC1&*dBv34XzCqV_`tk`mxFX`Bli`Ri8?5u0A_FW!`VaIesN2R
zt~8O{5Jtb(Gh{2n-LQ`xCm=Qzl*}%&KhWx!=)NmcWA##YsIV8tW7-k1M62|KzKbBI
zIHT<bJAzGz?_`FsQ}mk(ALTB$2?m7Z($VF}{al^SIn#!11t)75V@yI1<B%}%1oY%H
z*)SP|#n*coc0|%js7r93&}P%ViKv!1cI|O*<-AZ}S}jsNDg*Nk-92nkAz*p8{YX0h
zXvnwrc6*35+V`PR2aYR0F+8QOUuQfDTn|p3&v$!`>LwR%8fg|<=MmBoA5uD;5bkFy
z`&{RSrq;qlr*&41>Y9DdVNE%Prt~B9Jy;Rkvc`{x=HEY9-5tR<-o>BKsu+7HNzmBq
z>Aa2J-Vwb#Uo8f?_T(bbPjmK5@Cl_69)|!i-;WAM%B<+7ES{^vhii@E7?^jKgQ#^y
zWQ5Qc7ZH500?79C?%RD32GHs@ikf{)EHbJ+-RZo`uzVm|L(KHlzU~SqtNBZ1JZx3H
zciePzT+<G-CmoLVm?7Ce+<EPITD^joxX<jJk5+vr3XCgk-<*F(MChfa&4UwMXWwKR
zn%z`5)|wkbbU!#qyZeTQNnHAQc~2}-FA$oa_JqRhwX)Q7XkN`BXoK?Hwf}}RF|?iR
zBI+e{gA;j8H%mH=)S1MxC=!Q~iAFgt#h4K64`kG2QKVo}(x&XK4X#e-Jg;3~(CqQK
zF^<`(e>H<x{LEfglh&ex@Y>;bOU|*}j;5jOpGi+7*$>J=4z$$SI!?O8n7C~#<{ttH
z6H#-jno9-*4b9Iorb4jc!y(Oy_%`NGKE-I>GPJAk8jd=gCMjR<BQD>{X8m}>nMw!t
za4z7$6nese(4GBZy((<>6KxEZJN5<4YGpc&BQZAFG>eV6w|Ws1V|VW!hA;NL%r3Tk
ztmb6(`D7>=bJUUANfJ+nwM#Wo;RC?soQ6Xrsurd`fJP$To$hQ8rQ7EvIEyb|Q;5KX
zB|MgpEb$VOJ;J=VkJA0K+~7ymm<koyLt;dsqz2~d?a^}7qs=6Z20DY;jj*{C5n8qT
z?NP)2qV?V|C)1}TUU9VytpgzqN!R3jjo<f(TyLU}mB?sFbZlDTXp_$}N5+Ud%UZM(
ztSn6>kc6)0Hr#&%i+q*wDMv2Ihj76ss(|Hk`|7y8j*4?(L$vbgy?tSaN8TEkL`^Eq
zCMu{sa&wU?yWO2pdkz5R<1X4WXAJ6^XgeGt01wvXK%%C$J^U)u7lhoILH|}YuwxIN
ztR{kFByUg&&Sb+DMQ9+TFxdXc@};MXiTUuy>9m7cot?(8@UF0kYCEs?eGlXk-Fz2L
zR3GHH-4JCB>l1X5I4FLhZB2IaB)(^luRx!uUi#gjb-HJ_Rh<sv42?WfUMQaLnI#(*
z|0qHqtB)8GS9cyG6dGqP>!KqTs5H%zrDJP9c7b_zAfRZ<+2zp(FW9Ao%f`Jbky?>t
z?<N}#G%q{3Q7q`qy36gkA=zHP#ZZlorDa@4Z?Lig;>$(O38aN++XiB6se5g>w?Yc2
z@*lp5je=b|CQ0IU_oXCPAt6WGpjV8@yj&B@t?RYvSF&0jRKeZ{hX6NkFvAGeoy{^L
z)FGIK`#`qX@A^7%<DtPC8(ZbOkJbcgh!sBtHO%t?Ka8bVPtGGbutz{A;MU^j3&~G3
z3@~T0RPd?}BV?cC3_n!PdH6;kX8!bE4>i21Nw($jp`{l>^3~cfm83lE2f8qkSHf{Q
zAyqecY-2~34PrXyZ8dz;7CU5V2Mv#t*?DM(-b{8)cPw)9$9~XZ`mns`f|Znwa+026
zr5%jfkGL_que~q5^LF07?Ax(f`<wfQUsc!W4un~goDGp!T@t+%5fOKMuobD)oAaTX
zCrq+F#MQ3EKn~`cv)s$3sf*JgG}}=iS*goo&)n*8#m;#0Ge@yz;}O|O8Uwp-tfo7@
z*L$7onQeB*NGsij<xbbrDwXD%rbYXC*vpfNh^WXNlTJ*b0toebA17|rTSH`1A|^)i
zjzD9&Le62d?M}b!y)@iggnZFD3OTYa3FO8h9hE*yojd3aGxI)hDX^y<Nm=meemxK#
zHfYBfdn-EZV+zac?d1cpds?~Njg1D|9hok`Sm9V#f=jhNw#bQdU5nL^mi>gRdu^xT
zZP<|}dU;T7!P4yfsWal`D%WW=-#O^el9736Vz&5IS#&HP%X-VgR5R@@i)&AfAknPs
z(QxjP5McZiKNadC%E)^yvmrR6+q|t}4W_4^;^4-F<7dn|tK%6H`nhwmdnq#3fh`yR
z9PaGQs?_5X7C9AHB_nJ)DFl<P?8<aZMDjvEYCN{4ZzXWDljiBxxf_+^qQ^uMv%Zq3
zG!V$LVi=kI#3M#8GsL?Pw$G6(U?6237Mau``(}veG);Hj(gp3D8?@}!H_|VS+fjAP
zo>nQ&tvK~OTFFRD@gARO{E0EsZmj6DW5Gv;4YiGjBbvFHjw4sCNs=p0-zYUE)9GM{
zwBsD0?bh_QWItGWOWIa=ZO3%KJ?pr)hodk*K*w_6fL8>DQ*4KQci@~oD@tY>o}=AR
zyh|rukJ{+fC2}|2*pji(6Jnz$I&`#i@^DV-)!m5XFx(qsghk$&SxuMpk<vy$d$6Zl
zPWDJn$?UN=+N_02xju!Agt#Ohby{33;K8MDq?N?%wCZqSr%S`$oj?3kr{~Z(;hOHT
zP&NftF%k`gOnmhDtSZ<0hsM)$OQzG;S8N6K;z_A4G7XOB*AZAY!M&{%B;_!vrbIbw
zQMCY*{zWf~iKhCWiOa^)PqZgD9M8_&BcmKjlaop0*Oz5}CDxD7Vk)snoT|kqMCp9|
ziJsSo-j6nyiRXn7yB=X8UwnI)9&oR2ybj(?=|l6%({aC0mNgF(-e+Bnpy3791*JW@
zn%?B>1tD2{*$)Y->`B}R%bM;S(lBiy%7~#2-K*1FJ6mNZPOnJB6KZ*RUQ3So#T52@
ziaqe2y(J4-#?K+2i<l8(YXiXWuPjcb*anOM1_&@<b$B_~Crmr{WCf;Ojn}8q$YvNP
z6lvGR5s|gIdq5|v>9|qu2NE)s`R+4Dq&*%?PYJCNcFqSvUTNm!z0AGAuSg~mEU10W
z>EOwk!*vA1x%3GBz*xl71ee_GR)nLy(MyCNoLTAe`<(2o6ykH0QltIivcpmx6|uxN
zWNwCMuc#>$YqLEmG&yvs$Vl|4xtv<}OTv~ttO6Cb3UJFI;*_EX9rtM`B-?~wge}qO
z-bUA0&1#}>;b1qxhgX%t=0xZNo4;K}?5?1AvPC8<p4c}e!fU?FvKdFELoReE)rOzR
zEp6t$b6B2$j8ugDsBLwIc4Z0YsTNf^0#oG0=38cg9mBO;7;9v>RYy(WO&2iF1q1sq
zoiAXC7QH*k0D;e_QtqIZby_=i)r){0Z8Gs-+-pmh!nCz4v!pthbYo6f>YTAn&0@pX
zO?wcet)lHHcjQp!i|8kziK=p@{kDR#eXcr0CUZs|Vd9skcfls3yK=Qm>Bk9K^^EQY
zNjTV3X&xq0+`}5|+)elXqdtA?p4^gN@?6|og)IIba*AZkQ=+g-aYeIMGgCMZXzByc
z=}AFIk@G_)e)#KSa_#hi6mzfeEuZ&`)gpDmZYA%_#LCN4ctES2!k+u&NLbO!;r_Ly
z5+$alocipht_Pjx&aZ2ZG>w)$VpAe&^YzMOx$`1`21b$Q_&h6<fo4W3o*{P;hCwuN
zbiT8+!ZxSowYCclZrJM0Rs?e{wsa5h3vK#leP0p<ru7{r!veG-+ykm7?1|7EUf@~Y
z(V{0e-EEm^&|KuqIPUUf#M*U;GK?}_?-Ap>L*+Q4q!*611i$aGn<aql(>W1yAbjq%
z`l}t*u_1M5F1H?C$>D2TX6M1|4=m|%n4~EX$bKkoPOoGeapWpzSZ-LD^St_gX*|)a
zZ~}GW4~vnv0p)Ns1jsa8qvfMjg)dCh#A*-iF~APZx+GA?$qx@!Du|A_Gpwbfy>qyq
zJ`6wSPSl;Z&&0EmR9Ol#30{kP9U6oUUBDn2a_$+;E#wKVq0~VbxQe%W-lw~C#DvQX
z9+Pe@=b)lFFLB2#YA8e~i;Us!elyCPhm`BR@@aFy&KN>0<O=@2@{zkwB)XPeqonPF
z%|<HmO=j0$1?#UbONq^k`gEMln&hIFj?zS;qaZxk(+VCu{PwVf?7mUpE!g|ntgPXK
z{BvzUDhSx#`wSJ7u%kQXcT^x};lutG`;a@*{L}$TQe<hm20B8niAr4_IXA6#Y6=&!
z5e4u)cd#%Fdj`v3wGR5Iryz{YAZskOzf4_6(UImrX1Th~Lg^7R{6IFkv(eH^pJ<J=
zsoAbH>BM-EXw&wwU-@|c2-QC83>KHOF8XhT24lmfwFd58bUbu$xkW`;TzsGWbbgys
zT%IXG$PSt1k>Fem&Hz$#iUWiDaZUEB&%KmmcFC}GXSco%YjRk~;Nw)-XP+~4VYGll
zC*ZPupmr%$Q!G_BBXv{JOBITp{`n6poG;R9YSW849;6OaCe=Bi?n54dWG81ic!dFj
z=t?l(*pNWEq7J_-&X76=32S+XlI%?M9adYLh;>K%gITOYQFVk0X0G*RF=6>G^;oP;
zEWGE$&q~t?BMDnVmc8pb=e04fDP3xhBs1+<Z(mrUVi{lc#SwwBeV<7{yh3-|)89$#
z5DsCF>th!6v)F>9oDSCja85)Wac+7ueFRpkE<i^7^bKK55Z^2H(DpacxZxOhmTncZ
z=8s>$tq4sZtiBv}R6k?Pv#_}j2OfO7Fxt>cC_%lXsgH=FI>__P5kYJ@($yj-=WI#Y
z{{B=c8X%k3KNI-2mpv>I>kk`dJo7*oQ&vbjPp<5Qal@{{66t&)_v@cO%E5;vZ63vd
z2w8wCs1Onj*bY`H+~dHQ)DCH*xq1<kE%u%FEwfxUwo7Ihh8^?=BA*Lj5GL$O=#af;
zq&?LJ)^9vOVWp?hvAZkGNrI8E8U#1axlf3o=cPiR<v-td&(kbP=XAaRM$=U(%nvFE
z?Z6Hby&8S~Q)e?Bk8sG2&{q;z4kbgLRkv!<^Q1aX;n!&;hd>w$1oJ}jo`^to0GmHY
zg^3-q0qhf;H;y}c1KI%#3t<NuI~0gos$lplfXx=6UV<}PD9nKovb7G>d#Vokt3z2x
z2YW)`yPa@vO2Nd4fqb_DwqdlKWxoRCyY&n2byLB(vavaQLr3a%zhW{~00$Gc6PWcA
zI!&S56srq8%S>kaz?x+|5i@n8e-Ab2U$u}bf`fSg>Tjgcgcc60i`nS!C_J8mNxj*B
z|L^qA2ID&_fd1(l$-OF86rHj8AsD`$nYs@}9>7utC?O<&XS~Y>+HKpgtQ9ceiq;zO
z5`fg&JsilW;-eeYBq*r<^oB>jG88l9B3f7W<p<gBlRKbz<~<w0)9RpD8Z-vHg%Ys$
zOa_i#gc1{VY-a?qa7CN%%y5EH`xrn?ACzC+?tfMdEK)z?1UbPIX(&_br3_-ji_6Xn
zjD!kca13h!5sK~kV9P`KpD2gUve!^X{I&WrLLS(!3R`M^kiy&rmivZ$JE(jVgA)qv
zCUMVXCP85*lUa=F!N*!1cL#UB9RLCE>SJKzXT#D-v`rpNC^d;cK93*Y3B1S4`B|28
zU{4)EH3dXwS{#gDQ+|jVt^X%RC{oaf=)Zgz61F?#m_XPHB`wu3{6YC2XS8md^Y(7s
z-vOdwKX#I_7nD3YgMZD$O%^_!^t4_I_@lxgH2v5Pt;U3r5DG)!lMr!D?}YM2yTYeq
zgO9=&V=2yFU<T6x(j!`U#ay28tsmz4%9Lb<&&utmyDpHiCsxlsVdpfNQl^mbm_B+?
z!rN*u>*-fFKq%s)dT{XAR8JMfyA-RIOvPsxc`~Y|mgmQs?<R7?>nKvz*H#waXZWo<
zR$RE_IPfH|ux502UVo*}@3XOLsxbZI3$#K*DH1+sJm;PZz3*}Fa}`!g5?w68`@FVj
za^ByblGmJj0h<b>uN0t&4o&}7M2FS;F$k&Ecz23|OAkR=;$~gdCu)Ba-zB+Xi|3=W
zcLb5Whj-(VyDXhCsVss&2;#Nt;x|(uW7CdiQceswQcW)t)Iee4P4q71Ndo)RbGz}R
ztKBBL1Qb(+XPkX{DxBr%gvIUbx=Ldl2kI-lSJWie7?)<>JC7`DWveQdSUfj>5QI-q
zs20l@>b>+nN%NMeT+;GfQaI;{J9LBzRu|3USu`SQ&toyB*#{HQ#4h%FHJe<2<QRK2
z07@2O%RI-ry902;XQ#(bC-y+&vxgb80jq%OzrkcAd<Y~pm8*sUP9f@MTj4sEC>uuF
z=sS?Blb7W((%S4gm{({2^16!V9S3-lo6plk4#euG8BXJ|F$(%HRlFk+u4C;tV<r83
zaYao`grl0XA=y^rz`_~`0Ak`~vsLDai3^zv1I4EEm|}Eq3yPeX!=9e+4;bU<4JmrC
zBS!jkRQ+gYN%r3T+;6YpRi%l#E2WEhg8znXpdR0IWup8&pxeTj<~1cIxp49XkC}Ik
zlRZ_4m4ylVW&Lvdw3(V(p}Zo#srSyqO^x!jKIR$k_DQbNQSw=QTx4}#Wqj)j$sowf
za#KTzY2l;uC7i6|tG4%rPXPVQ1{Am>qCgjdqaWC_*yqOuo`$CfQ&6n-n8{2P_tJ&Q
zM;!(sv7}cyT2y}S?dcLeY?e)_L|GZ{yan_$Rbtk$r4m@R4h`4}A#tiYODvUd8kUXA
ztbH1|uzU7#SE^ixaixo*@5+P_Ll@rcvrYr|p?G}-0flQ6cCMqZh$2jco(%wLjol9d
zt|kvw?x+f7MM-Pchmt%V8JMSZux`!5akdjPP+mKgKQi$60uvAv`ucNodu~Y<HAUwP
zVJrB+t?HU|3WTSKY=X-IUp%N3K9g9ou9_jiap9nN>}wN0V~2y8B|&p|Giy6<rLbtg
z23B@ik`~9@dkSUvxV(0M^45dc#vYHc0;<pMoiqWN`g5TGK)7!T4l7}hW29q?Hn1wN
z_0W+$v%HhMyZxU3u4_i+_6s&3rO9&Bs66qi{Dq+Xij^)S5p(b8+MeOVuB&B8@Vb0>
zL0DKj*Cfj8F@1{F==Qj?%@Jg;nE!aIhPi^-#H*Y*`lFzu{fVhbO1J%>i043)?<Icg
z*co@yTQA-{^2YBi!L16!qrlZX{=CKR4neEs(6eOC8|qgGoo8+rmEf{RP{=V;ljRm2
z!#`);maBu``LWBUu=8=~jT&U_TKj&<q3}c@<jJMwu|j-}(Gf#n2U<=A>XzFZ=NMPr
z{y5pw9kM?5rlQ10k`7h`bnl!`K2<x_VdVrirpN;VyeI3u9TqNE6k*CTX5u<V(K5cH
zLRQ141^`6`9v0bupf%%`#G2|i=or|aXYdAo;VhmZAb7;*cdN2<b5$-QFrSqL{QD#$
z?Z7f6#W2VRoeJgxdUZ(Q<?Sa=hB*iE7ztG&%bR+y*r*VKlEQZ%yPJT|<u)Sh00a{c
zLNpv=Y*UE-r0LvGnPi^=gYrz32Z&#>-)G+e8wGM~A^ucK-OES))vd3-kOG<-X`@b0
z#+t6HXgay01x_efiBBbXoKCv#^dzn*Dc#wQy6WCDV;nLrSETlj@W0xFf1o4W@U3`B
z(O6g6m~(5(k$N1GLki*K9OGPGMjZyPmoFh}V$RHuipaXokLp}yId4y>eD};eE<MZ4
z!GHvE*Y>D|$sYC!r}wi~9P*I|m}ri;a8K?=j`OThbBv!53Ewx1SGU*ZD&nSVYP40z
zq>HW#hDLS=*A)R$hGa><8so0Ed;_hTmC352+gIpI=vQCRX@`@sAG(-SvWL9NZ6fy5
z%s_Cb5EtD}Bmif9BniT=Ukc#igY&tc=M~|@em3<)5C9zl0F)+$A$9y7o1^+pL7(N1
z1feeD9W5a#v)Ua#)AzA3LeDP%q@ikYQE&{%dZ}R<Et|w;B3N*X+~=t5{wB&xe3q&q
zGc=-Z&lJ>TO<1+A@0~fAKKGc`PmH2%D6~lCN>IyHd8#!A0g9Y_v&T^hx9z$Q6w>ED
zqo%(R*wZZe+4h;9kV3&|Ti(++W;JPSiRuftU^hvh8-l=vh)uNqk=Uo50)#H}W3S6=
zeLhXY8&gF}UP|28mJgt>w3&QU(TpSN!#5P{%KcPSwu}A%NsQ%W!8L;p1FRc?X##gC
zh#UwFLiW5`QOqW0s3gk7TN~Ze81zaTyZ4e1@(8J$`&6$$4ec|H7!QJWQve-K>#Lq#
zet?OwN(pR~CZfb{3If6VPgJ^Chp^ID?u%&8H(&sgR9YJxqNN}!X4zScSa?nJ+)CBJ
zt@5n*dCY3a<+!^yz9$e5f^mv`-`Ai5qmP3U>o}-04cHM#Q@#fWlV_)UnCe?PUj=rx
zfHV<T(mOGk9fc;6=fwiZF@x<T*FRmQe~@`B^0Xg*ta*I|WzcldF+ciQJfc(W{Hi_&
zX<<op5z{sHK(=LNq)uG9dGfBIQalU2POdgl16|0m5Sa7e6>_&5bm2<HR)pfhyHvs-
z$pjN9-nQ)=A2{7xT|q}f`kZZCy8HSNzlP;&e<9dG*Ois2+Q@iyOZ-`EVjO(d2Kgvz
z$Nli&j~WWcvbgl}^;>-RP0Gl^vMyKISd+QE4d*vcT?*#g=QxzUwD(Q|yWRoTA}fo~
zu)_Hx0d7G(Ia=A&xT0gi(Gn{o*%=3#cS_3YozB6z65;zIzpm-TTMV1aixLHaR`scb
zp<zDxmqglD=b9yxfDG+Nw%yQRcz7h}HrJ<vI3973#}s#H1(yrp5Y<WZwLBAC(WycT
z-<H)U?1dh+W(!cQ7aOGRgpj%uF^#KR)fsAIw5UZJggiWl9Z>pWVz4o8cIXnatM*ve
z<%%$7%!H`+m#n!KJQf_SI}FZ;@c0m<G1n1b5NhthpWWxQJ8%bG0Ntd6eJ68<w<&E2
zxg1OTWrk+D)6d=!Qe0wq6nzFSG<YbgNG5)Nmbuk)w&DVlT235dMu#(aY!9T>uUOaX
z&YV^mFs}8z8`GXcWqv3i3!)h!x>BJ`P>7UNN`?qm7O}oGoN?~PeIB#=fRK;}t|8TP
zvK`j+;lp&xq{UrtqAkK*jC%OBN{XJwFa(cxmpA+3H#xp)-N!YG-7+)qm}*!wg-Rb5
zDdxAnI@=#iJA2;ghG;4>NC=WSWZifl73Ahqk*?M5QwuP@I{N<o1-{!R)mN@5#pz10
z8fxagwy~uSKi7N|f!h(a{DNaZH2Iorg_36R8%s{9s-;Ez9L>zLgN=IXAV)dXDn1lW
zFy6yqY#1muMLA_gJ&@h(S|#aj&!We2A!7aX_3GTP>jL?@s=Yp3RT1gD?6MR+U4jW2
z?siFy&g9CKdyyg&y`5qX58fnDF~Kt}9~ZwZJAjY`sc=JuTfA3jHQqIOe=O@Rw~ZI-
z^;Xu*IzI7n;d&Wc<0Cx3#!Ocuot8V6J9kIfd#rwnu{Eqx!X;;gR~uh{yTWVM!loGW
zBvEQWwaR<s+^X-O-qIZhjm}#4q`f0;a^(3-Eoq$RW)1VL`wDLFWhwo@W172`qB1Ik
zWT5Ol)9rE7c;JZBVA8Gy8)7C!HaX>lq|>ek6iPp|$T+Obzwov0%d!u7?XVU+x^hP&
zkD%FjitXb_Tzvycg7wQAh!u6@6_90NDP-{9!FY<yZKAte=;7I`dmPWcBTkp?pp@L-
zUB+h~R<b8_PfkGC#gHL(&8!gRheTyTqiI8=stTl8H&QMcWO~5H=~`nCshn=0!zOJw
zz>!%WNyThXZ0@Pr(NpEl^>E2oA^5OKjdzG+<bIwymar>S4~c!xTa&wmaLbsdmyjE;
z(c-NKwCA%XVeLB@pKnld<nHdA%Z3gxA5u9g5K#8o2~!pdQFInCNM*e!?&Vhgx|MBQ
zeH=r_^*O#JwcAv0kG`el@Q40o3L>{5@xV13)f~CLA-=sc0<HGIC8UAX5wu(>u@GN5
z8h%5j#QRkpft<#(K(2he^WxlAFX$L<cBVZk<@6z$fe}0S16{4$5t>vD6;iLXCf#G+
z^ER|7&*WMuDXT#fp@Vzd@DmQn{Dk-6Lr+#{-zTYxV>QSmDDGIoXh|IDS&T5joP1NM
zSig^!<>e4p(ktr>vO=dl@JH`FXXhXC%r>ui6+2AmhEg@(<+G-@tGs=hk&nDEu^|T5
zO-sUbI)0cjshmO@uSzezI^d1$iv6Qwit!zXUfcJq8nIE7lU>78?ilfWW>~yX0Y9Xe
zxLRQ{E7Q?t-Ekr;>E?$CYaH_vk<FL(Yd+Hu1qo$Ge8bN~B#iDgFPaBxoL3UH*;1Mb
zF902nHQ7fKUEb@fcQTy(`NXZMuG?}o)Z*F}oF^IFr`{ww`=ut7^VJT!N#1vRaGOI?
zp|)UR+-a`dWM5+nLK#ymyJ_uYYfTJDD;pFK$A`6@#=}r<V!ut3(iXSSvn)~GTv~K#
zZtYBHZrD`+GGDgp{=V=GA-kJdH8&g5s_eQRw$u|TMjE;Ao!7+`b#I(KLyS#_(XZix
z(I((ZrtjQ><6Y!GC>@YorkQ&ztU)+S9~M}2Jeij>roFC-y3s<Fqb=?76V@z}lx*<b
zF)%mdNF{FO9*o8VaI6?B3B7%mF$V12hVMyX_0yM5ASu#{uKO5X9e7qmbkm~X7_LSD
zUmGyqY-@BZW{<)UQ@Ah^;wL?2{CqswDWg%hl8VUN=iU-t!S2t~igvtP9zbJ$yX*{m
zrkJDPRr?z$;rs?A)W?X(QeWP<#0L*v)OdzT>@@T4k?Nwv#oMRbo?M&4KPijR3j=O-
zFRanG4l?{f%*LNbyS3nYHA*V1D+aDB=8F@(^d_a&Fhb=S5=Q<2n#?DJ#z6}iVNtA~
zN90?N%1vIJaz2*l*~1n@m@4iX`^u(ONh#5tI$6lF%baQMono@m?W_9Z8T9L1<Lwi|
zj4`ixc!u0QPD>>VIj4v_?Q4-y9%@PiRRWa7mWgMEK<0g~V#j4UBZepY?ACG-qm>4u
z&f`9;C#p+d<@de%^k}A2iy6`Sf!=B22@`|iUb7TXT4Dvqa{Oo=%DcdmnW3Xh(SWR7
zH>GeB_3JwPk~uPgJ&8+k;RLP1)2N=^F^YyAH~U;Wu0>u8G`{lgT9(YqGn%>H2!s6k
zRbYg2bjz>AvluB7mX&)xIPGe#<YJZ{&1+ZrF;?5!MCc*={-e*Xa2N^e(Y()8diAE3
ztIuaHf#-UZ(vu+%$!6yPf9(qIkxW?)`FSzj+M|>0eizqLG6ZiXArD>9^685?mqDHs
zbhAr&@%Hn<7wdPA1@xcQh%;9&Y-+vP$4x0(nsJ{fqD8R4nAOXDozZxOAc4k6=xOGZ
z#r{?nOZ&16!_qjvwpuJg70;Q%x!BKfFijmf-{)x}I%oD#G6;Y6NMBxgat$wX3^zxD
zKI&CWgX+@T<T*>VBv<yt{f|513)DM94wkJGe`+qtMywX?bXx7uP@G$)YAv<1G^_pW
z`?2WovfK46l_V)$E+fX7cg)ecX=j8<Lz>bDY$fwG=Y&5G8^|mld{ZuuOUij@%DnM(
zk(^EI^^(CRo4ET;3wfUXr9~x^g@X7t2L^)U&lkyLbVydtf371R37VpGepzS#=z`=^
z8>Nns_8MzrMNq(0x|Ug<y1FKm3`*FjCGIqbc-%Cs@;7fvWYD&^fMfB9xha_BT!ks#
z!g>~2*L01Rq&fr>BknA7U6bL2jy$%1w7|rR!{hu?s~LN<c4wAEJLSCOoL?!iY$nel
zl3pDNda!nLH&Z@)ESHHyXN^zQOtFNsiplxDlXM)K@}{0_<An4f3hew@p`&w8>Bi_L
z^6NM)o?WE(RSqfDu^}@e%{{wX64l*Z*HkGO(s9IjD8lhzYMB(a)Q6Gl%^@YYq9IJF
zM%kAHdPvR@2a>!oR<=pdH%`!#d~8ThuHzANC{p0|_4lJC%VLV5kKUA$%AOhRDx>!!
ziNIh9x%^TkLv%Q&=;hkw8#pzR(^wJD&vmCv(p;2G7G^hrLW?!GVbm8GpvaET6&w%c
z4k_$}b>XDF)N9wOlf;wg$Acm=*rqHREwD2tB<<sQ&aXUqJ(tK@0T&f~WN;o^ij5UE
zwozbpqk-K<DW}GLG(aPWFy}<c$&ks?52LM?4k67+pRm@a<oyy_&aX7MKlS6D?7Zuf
zcl>}rM@UoM905xwR!)igYr$k5&+7bT>kR5u&kHWd*T+-bKh^F39I`GR@v+wsKSilW
ziHQ-aX{(M{KFseI6(eB<U=p|M3T&^uO1iaJp=B_6tiUi9FX9chOo!AmJ@G!G<GaI2
zlaSedh>)8aY&RF~HCIJ`$l_YY$LFcV3?)`>r*6hA8xblik_qE&-!bxp<A_Tftx!Q+
z+0&$kCpRUZM$rm2UQSPoobfX(jyQaAEz*=!ad|HlGa*B{eGk7Z{(+jBLUZHYOjcFp
zNDEor#2oGUiV*qLl`CgPq+4seOqg!NsGL`Q2UJ6e_!8;)sb1%wDpF2966q~+I)q;I
zmVI<Jq0L*3TMhEF&+8J3O=rACyq2@}QVKoVFOxkqAGsKFE_BIX1SXX2C!%{&PZP&x
z_oVXB^F%4LLwUBDMho%!*d>MaQQWdKR3m+{psI2INNQVxw^RM_$I!lOdXIHm-9LF$
zc+YT^=`BAg!qG}N@x;_*SJqtnO>NEjrRzij3v&fD_4_SyLsYSu@(p^=1>!5>jZiDj
zw2c*tint9@1UHA0iwC`2<6aq43?kgKt3k_K&EITp!5Gn(o;Dj!e7WPXm%F`lrO9)u
z#$-^iIG+USjdU4&sOvZ%W6>&sI#BkV%d)C0{p}?(%1l_I5=rk00(;7z3hpOzQ`3HX
z?-oPwHMJzcR-bzgte4F>U@m=HUKA_)>h_zZ&Gu-noOFMvf+zas<-57l_4=pV%M}sM
zSBvo8W3l(a{EspN-=m~=*L~eQU5;I3xLlVpa_tQ}`d5V%czd2<5VR^};0G@{o}|f4
ze#V*(ia5y?D0>L6Wg{rf331i<?ceBMS4}%zyN5OF#_4d21mSdCQR>wTe2dTUO_1HU
zRlrv2-Sl43)Ml*v0eckp+{d{UU=evHPp>fjateEj02BEe8Jp<yLmvd?UlY(A;m4HE
zT7Zpm1IMoZeRR&QJ$tb=UhOa?9=XRd3w%eyyb!jRa!k)=iPqrH6sX+XIUW$Zu{>)j
zMkue4&RkK3t)&v8Y5^p75uFafe)ZfSAdNHgofQqYiR>7hD<y9Vwp&;JRe!Pq?dvhq
z0*CR?B0l-+ruM=wRSOltZq|HuuyH|Tq;y1G2(yoICl?emfnKTWKL$kv@Bs{sbHIZ`
zDW1nYisq`A9TYGRsgPpR&OiA$niF1@taYd%XP}mf$RR{prnGtd+8CqtEGfL_w4Vu{
zC^ozE7>`x_iP|%dKnA&svlT;h@jw<VqLpfP;(5eW!@YxPjOj<3^~3Xl@y}(+w{G<t
zlwGQJvi9nlY#gXnoI9c;ja@Y|r>6iQHsV$D>yzQvA8MbJIo>aUEvgVM!)ADT{B_>}
z8S%x(l-BBH`E)m@S1Tm<F2&Jboe<;c;kA4YS2$n;!d?9&*_6e2Igb?D<dkM}1|IK)
zV-bZq4;AFzs7ZMr))2<wm9TfY^-7UVTaMQa)?06T%9V`<#w)YcKJztkk9WC;kh1A2
z$q_`wn2^bH<U360kcBR4_)9Oucj(`Gx8G~e2}1Gq+ZuKT#qcV}b*Hx2-MjY=<oc1&
z!MIHBt3}#yx5w4e@|hQiFtvLJ8Rspx>5%H4B)ew4a=6-J$<#k$+2X#^fM?XFR1r_<
zJuXw18GBx{k?|)vo-`99v%^u$r84J)VJ6viAyQX@_SD!_EocrLKb7BE{Jg5oa&mvr
zCz=;GuR7kWu<N<g+wWXs@+s}*T9K~Y8ojh8o^9$JHmiOWoKNHmY|Zdh9$Q{H4R`Aa
znu*sVr+fb&V`m)|<=4IaFCir{bi>RL(jXz-4T5ynfHX*hw7>u(C7_}pA>9p12}%h{
zw+Kjgr}TSx)-RstUGE?7TCOF!hMD`m&)Mhfv-jt^W_9)P6s*Q`CHY**1dzevGnNI`
z`sXdi&H!fAMBUJ3AfoR>x<nl(&~4ffZJ{4W)`+W7sNU@0Rz3xOotzAhh^ot_K1Y_@
zkcV(VSSY;^o_Ms;UpOu%HX0~ioTT$7%1tv8&L}6LfsK`rSjQxDAvis7;*+bXJRi(@
z&`ijyD~4R@AztZ~EHmkVe21~iYLNu@QCV@6%G=0)B4+psnHSGNc&X5!9(qNnbW)|`
zif?Ttz6RxSp=HXvxbrI^ueuy@#Rq&0>8=G#C+RyDK{7HwSdCI?JL&PXWLk|!-=1y%
zV%lqSyL{n9?)#j8*mC}5^@qtWv%GJ!_4w*j{TbquMJXk;a7K@)`6iz&%(d0j4?LKN
z_$qmQ<86WMQuTK{qGcraf1QDHXS5Mhx-e?9>1b{6^G`7eqK&zt)fiC55};(1Wqhw(
zVb+K_?3-9*f}eK(HRksx>;of|cNJjXGW72dio4@?tk!<Z9yg+J&^E$d^^Hg|Og@q1
zJVni4UOic`(IZaPh@cHpZm>zZ<fSLU;;`?(J^l7)ci71UThq|RNc(`c8W%jEkJvX6
z%QBwcPJwm7IMEB@)2mycqwVICXSTAbq&IXfeT#FTf=aXFAn^Q_zLBGd(`|W`TLweU
zjxhStU|)us6%{3mKa8szMUWQK>fCRCwkxd5oE?+mG~Mm1S<5s1(4#x(q;c~T?BR^Y
z%W-h@gA+nZP*?JpOD(CEw1Z0WO`Z>h>~K2hIY>t65W@c3?o3~VxDdi+Zo=n?e!K{3
zni1VUw|rT|9->u&%JK}bk-?ch`>E7O`WTZjE=xU{QX%YETQw_<v__eoNM()-_ZZ?{
zEVP$6wRycdNRDmjFvRwJ`1>boza|v+a$bE;<mypVOd73d)jJoh;-5xK*7-k6alU+l
z4NS;8ZJyn46+)&wgP5M*Cnuh9;|T~ObHP#BqOq<N2_Vc7&e*}u`vKpfvRBMOSUz%?
z8$hjg7<*_q83-;@4v=`)S(c9srB-z`{V~v~^V*Y*s``2SjDr0rm3v2`l_c3j$D|Fr
z-+pR=OHN@5=HsWbSJ6F)ktenkq^6=vmhvs+o5gVZ0JSiVcpg!1qyZ|bou@3TDkuvR
z{XIzk2t~Ar$LS81@RLB;bGXvYVo-;%u_}1YHu<RY+YfM|y1gc#S&5HOTf=FBj&R+A
zDY<@mXE583U*d3@XH!^id2N9CZibO0VACC^_5G;^BOwtY8qKO-d#K}=X!xouA)}O;
zm~W6Tq=X5v=AzT|55}C|zcz>ofm1n*z~vrdCPkY5^hf1;vU#i=R7^gG!t{vf%Iaq(
zouYw+%gj&nZ-#b36;`2$!QSCWmW*@2<|z}9AcM?~SoOo;;hhjAK#gaA_b8>RL%N+f
z<C8%KHTE|f$&LYiPt->_yyf|7%U_(WZYFIeA>u)7NIKlowkqW8e#q<&_6}2yW3gHx
zvP%BKu7-?#<&)a|+3e+>yrhVD!MP-i0Gi^~dKy6i27;v1vlC~}y<Pf^nVRgfUm-(O
zyd`geHey>8@!7b4L0LZ?BZujOSZ;-0B!@5Jv$`ZUapZ*rAGZYr&F}C04JC=Mh%V$v
z7T{RyOUl!_;XVqCt8EvTk%wpTTiv2Q4#8rdQ;=$3qgc1Sj~S3=)50zr1E{DpK-~9P
zeo@Sr{H&$ePA{bMY&bC)|KWv2f*B#xcA`a`_atY`%9mz8{Z2b6`LxZL0!Zq{pC3=t
zF&Z3aH?<<^Pf3{5q=Y>pV`zRCV{*e~tCi^wUKXQMLvW+Y-d_j$_9BTf@-1|I5^{>$
z4;C;k^P<%*%`Z9#muerzaK7Qa%AWrGI^Js!I_v$T9-Ml=EUt?RlZV=0qS?2Z>k{XP
zjgYoiAYxCI>T-;y5OWEqTOU$gvW|QJ#<cJDep>B4eEhTAxCtQudrq&{wn4t3dXBhx
zlIQ+%MCVC9iQlG>R{nPifjszf4u$VPgX|O{YeIj`SWXRoVUWe`$Z$W>+(?@GTnZPH
z4~^`(%+us&Ki~2C70djS^Oi<PN5AAT2_CU4$~(N_g`UjT@6VM<6){okbyC3>ILwK;
zQAGlI`~@lpTtf4pHs5`nundiXew5Bgcq!#Ep>TV19lw@FtAn3(T0Bb2sb9hjvJ5;Q
zTjGVh!4LAAYpubTaa$e+Y5s(iJZ6cY0!y0|UX)k*+x#T7Ta1S2HaeP3I`5GQZ3B#-
z@U2llf#WyO2=nzn6D^6`+O|svtx;3q!V&UCG8;MMyGG+k`iXn&!x!5!X0bSO3MS#H
zjg7S_tF`FmYs8yUNp|r?f?h~C9PQyV&=d4}Ymi<Y9nLJ4K{GkO2XS4kb5YSaI=eSP
zv9+Y)374b|9J`|R^{Pv+|K~>-2DV(4sK+AGoC01?Gh$5X`tV4enQ1@<iGLR*cU?YW
zjr75zyOwN#95>(If!@@L;jp!$+MBoEZ>0>@?5<sUQYSJ3)iz@ExAm?26(5-83p;7K
zH5LxI2{78dlHWWax~Xh6w@#<FdzL6NtgjSZk0a;Mu4yPhf4+jRnv1FX-7bq(ks4YC
zEt=C(gt&LpD3F!Bhrf0Ey&7NRvt&a35#DXhw(&?QBaBi%|Ff^bVzy58l}?*6txuHs
zJths(B$-u#Z3yCQ{#&q5w<*GA{W;+Xrw?*g_awC?f~Mo^@N3NC3Kf`S;Kdsl%dv#F
zV7mV<#B?>{`XEz?1!y<q)ykyYApDqbL)I#0p5ins{^@^{z0~$7=E)0j9Uj>BKIkd3
z<;U>7r;U+|)9s?9sQhDh8yAgEG(S>hE8hFp*KT7hCrr$8S3OtGeR3lHo}#+cMMI*;
zvspc#5aSxM;%)Zo_wO{hyS)<C()qMWa-U-jEMVMMSo3lIt%qC5rt}m#07L3^(LsZK
z@+fi+9+jKs=QKyDTazbE8j}~wsBy0@Jf2zzQ9!`qTm@ngmGVb}`nEIHI<1GbapYH*
zgn<~^FN%7aEi)K}WfV!^@swxH@NK?F%7eXSc!nasiGWVXTXib<tY0tsa@-6D(*+#p
zemtBmHx2zcNviZSe#_8yG43A6iqI!RBz<dTa%qoh`tVdq2!V~pQfK@FQf2%{cwW%O
z15}IxzQ)uBM>Xo^cget01(VwP9KOhmQfWtjpd$Y2g$WsX)dN)#$RpfT`8=d0`&1SM
zRl}Q<&s(qvPDslQguUOV>#0887*z*79eN7&p|cz@&Q6>V9MGH9UF?qEA}z(Xc<>}s
za6}pQd@khD)n>OMPZ#+e6RQ1?;YxM^U(fsApFGvqVnONpQyV2ki5@65`8PBrKTS9_
zQ^mdfxaB_xKczn%>Hi?i;<r1K%%+{g_YkJX$X@kcTnZxhsi~cD<YcDNE9!BFS2cF|
z+EL`WJ0%aFT34wTJ#4g^Rb&rx&(J*uD)qHGNk-w0=s~d#toh#&I+EEG4QHTOtp!C}
z3)qO_C=z+hph!C8tyb9FzMCEMI9y>v)FCe>hQn!(@iz38z=RsxL!D;C5hONj5liK>
z>v*FwYSL#j7tp^!tBPp2DijJW*&LiXuc0IEDRj%pn&<LlkepO4%$z@zJ9-B%XS=<$
zW@`$?h<H_Nmcwx*6&ACz)!2~G)r7&vY?;}yywLjng$`_`fI>UWb4*;(21Cjv7hVa<
ztj3F1=~iT-Lr3m)7#_SFWof4G+cjs83QfexbiR5erU0fG_1&e>v$W__?8qk%S0;4A
zN+vO)E*j`I<RnUOngK(ZBkaT_7&@T!xIOJK#`}q$7rl^wRMN%El6(hL+~z|E<c09C
z^XbvBU6C~bs)fIBA0B`YCyHbqs<6~evenoud3fs@WUiU-Fz`W0BZGF*)N}39C~dsL
z1*agT1YSzvE38NMR9Jrs-8pBHV2yCmMKroy^Fa2kP5M1-EF&VmD2aIb2g_|qiPI8@
zPUsR%&n)5WrRqZ#SD3Lmu`lYW_c2}i{WTY95i(zHaVqw<)t<g+A_lS9%GK+fT#6It
z_v#kp<%aV5U5evYg39{V^gGHe1pv5y7=1EM0KYNRC-<FhSs5wS<&>|=S+K`KIwcSt
zstk{i%aQv&86ON&;{HtwZ?tR7IW&$J8@!ePYLZX<K^gy|+n|_e_U#GpfpQuB&WHhP
zcN%M?4>aNv4UW3j7GG8cCKWEe3lpLHqb)D^It7#W5Z`bwvhQUsb6;tS<>73bpCONV
z3sgj%+vIrtbQekpt4lomphI5EcRNCE|94jRjKwgVz0R^b>d_;d;!t2Svs|q6do;{E
zBUXG;79VaxkmUGIk9=&oSGR<tAStU~w|v;Ai#0n2y>8%MlIug8E<$bp$#Hw8ei6@Q
zL;Pa9XAr)L#d`x6oit`wTj?A!$s<<CH^ya*%S;N}Oe_lJYo*j}FIwF#aL3q^MI8Cx
zO<=FBEVu~8amvcAwJk^WL{5S8%7g>2VFU6sopP2ad&khQn<8`a;}9+;TWcF+_F{b@
zckMLt!C>M4h_J$-cZ4D`c7PpQ^6o%z!97p;4`|2nrm(kTQ4hBhhFfp6h!U<vJ8*a{
z8z5zIYOoy<!XXzk&<~=8wCi{LTX3YP^d0}UPXaodykSp;ndg=7I(n}tI8o6*@Tz57
z{t^~247Vp7TS}qj3FPj}59_R0+*paAlk5W=@qgTMAU&AwRIUJRiYw7}V2cYA_<Lsn
z`2QF>ImFo_{x1xG)4AQr$>M$H*}x0`>(AG<yA#eJH!k+_3oz)Ortj~NVQBzg{u`Y1
zKQC{&b0GSH^V&S}UoI5Fbs@hWf(|$jJ+>w~vlR@`2HgLSyY16~XS7nVj}0n}TSTQ~
z)i!xmu|{)2Ky?N*+CV)n{b@V(zpn&1pQL~*07~zB<6n(okBU-$=lAMNH6mY90=V!m
z8n|_7BWA*+|9w3?4ZL1S&_inQUv5jd^<P_ojat>@yIQ6&D_}NA`M9*d5la$H@7%23
z>8<vP03ePI=K}Y70Z9b}%F3{*&IEc>(2J*86q*SHX$hoK@g<4kV&_XznAyYCXd*_M
z!5A|3<d)NEXW~P%Bl*4~?A)Ftre=vKK!fz0AFai2U7qdnTG_jv?frQddUO8WG0TWl
zV$$$8C^~J+gRE&<oqF|4;2CBhIk!?A6(Nv1@XZ9S_EDejeEt)6DF99py~!+r>|P#c
z*`O<*MEXFJ>hoI;{U1Ax&6g4ujDPPynEgo+h+PlxKd6eC%fN`t@bj+`2JG=W<rnR{
zE7fbSRiW*;LDwgfBcQpy)aX1>mbBOvt3I5gXbS*2SU11p2(u$V8&H!e+@`E6Ki?4@
zOamZ*H}G`(WupHTXwHYPZD9G^zl~WCg{aV}XwIJQ{ow_`mfuDaC9lOBGIkxR$<_W3
zhJfu@-EWN|p#4$jI4ZXz0FZ>kgYS&$e_DfC<fHI;B>((Ue8ni8!f9AR<t87#`7mH)
zW3TO&Czao}<`14I_)G0cQ$Tj0PNAkNWiDX0-hm5h8e=>i1>h7xs*|f~GINsW%8%al
z;anC_BIh|d<L9{mpOA0$ExbDyhBB>lIN*=^W{(BA3%!qB`18epTl<H%B3d`M>Ig#T
zcBgC<1xTmVXJ$GGE|Ms$?e@lWq(H-2VQi)MwX*N-jH5(PQdOyTZd4H9=U`YP_0ERW
zV%`p@Nx06`&TO-x)ZclG%Om1`@+56vUeLb{nEveM7Dq2v5psP&NcoE#QRt?SHw$uR
z7SOn5aC;9ZFSRiAd)sI7^ItADOBqi5wx=H6v3WaZnc{w$P)epIfB5#z-z6e;<C;7=
z$pFEz1vuOXR82r*I1QDV@^g^Wtf9(fx~>=1DdDsEhFLRxY9-JghWj3O9j`i14*f>P
z>T-$h@JDIV6{3jF1`~vE>_0t0m>`6ZcRxe=f^cwKg3uvK5M>AT6s{yZat^9cKjv+S
zwl`p2AQ3nF9YnU173MID+$t~b_FVvZ*8ltn2pP4R1`qmYIHgq`V(SG=3w^={6=ud1
zTt@Q;s~3Rmh;xvfO(eXyLovp(r0+M6<g3#bFRo&VVsF#~gbPw>qmk%Kqg_0|%Yy~C
zhcOPPJ#ys$^k;sv`F;Rc;25DVitOp`eFHMNc_|RQXQ>B-;J^QX1$6DhR=al63q#y_
z7tZvq$f%+R3b7k19Lnv4lM88pBb#l&wl5V`yXSqUuPVq2tJApZV5*B+mo^JM+cj6s
zlxT#i{RqICj#ZkPZ4EX}{6sAs5~P|X75v^tE{3^6In5zQ;bq)`KhOwiu9O3)tuIaJ
zCp%tk6{lWNqRULfaLCSR;gc0`M<Xr%Ua|o-&qR&}hW$O>^LFEZ!z@Th6K~h7)DFkQ
zjngySq7E8B(km>1fLQ4l{v$XQJ@FN=o=e@*I>hB`>*>-**H+<d{3=Rc9_=bk$yyke
z==IKVUl;=bQ*!n%X!$EC<fV%0*f;xZozCb%8+pwdthOtwkPOS~0Lx`ZDIlpZM?vD(
z%E!ouCho4_^}y$MskHYLuw?mqAFPpqu%H&v9<V6iJI9(oR)xN}v)UN4_T*rmkCOpw
z*1a|V?0V-(O|XjDwvBOZLpcHB=-MI-h{9r%`jP`b1?r74^&=qz#OJ`XXO1%s)mMi<
zc~5U(OwcTMoK`96CvE<R%c@NJWR6j*QGxDJ3<Hj%{6A8wBK{Tuv=pVnmi{MlT|f{V
zyVgv;BL#Zh4YKM{9pED|7yGyzJePZ5V6jWF?uz*py-|YWI9d=NM=LzgK!i@nm0mxD
z%xDEzo+$c3AuTf57}@{+p*-Lrt))K@PRfpi9QHoY0o<YN4`!O1%dN_`sD|D0&vX2F
z#5%S<@=k&y**@R!uf+zUE*E2Yx7X&tQStO~sX;kDB9Hd;=hgYqO1$tKPS`)mKRMtY
zvNu=fwwq(c$$-0oOxpAZ9<L+H7<F2Pp{#{~rc`*lQQhPjl_7dTd@b`&S{Ii^gty>N
z!K2pb^xg5&caJkAeo0ZxM1XXZ*ZR8Oo<Pd~P{BRrX`yC@oY|{D_tu4vvJr9$l8Zim
z=T_!X_|(?!-{!Kwl7<s<1uS*Ho`DfMzoGF!wyFaJ-ET6A2`onLuYlibV9RqM+`sf_
zIOqy5H{PhF2O8uk;9g4fwO3et2|!<n!0HwD-8Nd<V^WIa!^AL&z#`a=B<@>8U$b+h
zUdAUOJdSpdm4`*S{HlqBC{0{K7opFGKv@YT*+VkRHvqV5YD+mWgG!6pjWpR#7`cui
zTGxAb%}jkw(ao>q2<%jhB+Oz=$0(KBAstQ7dq7oPZr2WOdyddVVz(wLXA5x?z&_ID
z|4JLp?yniv&PvpQcvV|>k(5?>J;K7#NvAGkVJfL!9mu#8_0Qk?4p6;H;Q@06`D6NJ
zULENUCj_pzp7Nut2!}bhiPi%Nh0bi)be%!DN%tR^+-J~9@n)O%-=CKae8Yq^JKLR=
zrx!a70EN~#=b4p=z`j4R^M2PCCw#f&9oD^PfZ&Ix0Km5MaCH<t7W`mU{=i-y{o7*z
z-oH1@@*=`R7x&%fgZp{QV;Z+3ad#{~M4E#P2%k8-;|yj4x-n19qBpBhA`#wlotk@d
zU*aeG+bc9P#P|bGT>{%s=n-$i1V^oL&Y12&`pr7gb+Q4Et2938)_TsWphuCjpJ=_V
zG9}9fPkStkqfF+5q7$uoAM3JyM(h)f!{IeX*%{05`8-{ECc-28kUi7YwF0NPE0&Tr
zoPc0B9MLPdMeV!Sa+Gjgt>RH117t~SYV}Uzan6Z&FUJh~X>xG(VEt4{_DwMKLqIn3
z!yBQ=6g*}Qp+raD(_~9b{3Gt=aFyq{{M}y|skTv$ZYR9N;3rHrQhv6GSj8)tc;lil
z4$Bw|Dxh0E1dAlK<4i&cxlO3)U|NS)JBjbak53|d1jGRqkJ4?Uixg(7ENp+gc?hTM
zU%K6G-oy8ZJG$RLoCrZCuAv?asoQVc1TlVW{E2;L)!(0#3waKSb-7c{%Y)e)IuXNk
z@^8|J7xo};m00R-R{83N1I>pdD+#0Mk5WAi5Yb1Q$Fe7h7Ksl4?jZpG2wg~=vnTfz
z3G53>m<DuuJR6&!N<By%rn6p!K$7mA=d!4O3%?i7#*uCuCGZQP5xFiXn75b1TPhR&
zRG=_Q?u|_1hSy-9cGx<0l3Jph!|EH^pm=|{=l!+X4J=$_Z6ElR9*&*|UJ7ZP05<IV
z$MbtS&4vR;qb>_Yjy>4#PHS<rA()Y}-W^6hky_kl2bsn-QNB^;V8^xH7|B=iOOytA
z-J=rx2=WQ_LMkSW=R4E&gSgFb#TY72BH~EHN(&)4flo(>3>q1VgI+ZHERiDyM^n)2
z05j+dq66#vc=SoU=;q=BD#{5ov+7D{S6>bn7s5Um>$x>!<2000+;7*#EWx24Y?0KV
z%0GBclQk#1{QX4E27Z_h;-1M&ZRGH&P-E#n0j^{p+P-%z&Tj_Yl^Fg<MWHi)ml?MR
zt;DH7jqcX{tsh=<vhx2VnWHS^z4WffIkpoEN))M(=sAn-s4ppaB1V6-k@N#P@_|=>
ztF;jn$@i1(fer+w@J`CBpDW3o5g^riAW6Z8Jx$>0xHLf6TgWJg(u=suSa+fjq33|k
z{9Jgp_?TZ|4&;G$DeqDI5pi;2WzQhtVmri@3tYlSlOW2|#iQiV9Z)m^S>S+r)X20w
zgq=slO!}0anK);E2r8L07n?)M7Lcur1kao+l6AYUi?(cyVNt4wS+K_}?spoAbG*UN
zRDnXt8e+#jkx;^oNuvj|&;TC~(mYT3p^d1c{i=6;>|Ke0i9t=UaRW-VZ_bF9B_M;G
zv*4zll%FT(wV3&JC~+nxvP1+5JfGv(WE7A&j9w=eDqphYqiF)kuKEcqZRlZD#P{rZ
zb{BaVgAL#p%6cUUPN3>0H9D};dmwk0IIDrgIe&4j<2oHI&sn_C=qKRt@&FuO`hk6i
z=2xyTI(?sdpp<RdR1xfH6?7b#%Ex0S$5Rj?@r6)|e7KLUs7xU$R~kv5^BK~$QU+t8
z)KM-x0`=)3Y)lyQ>u0MfWmPFu!x}226OkP5UmDeTT9uHQ#_%YXZ3s1r%tlZzN=O+)
zjP{j(Ih31tp^$dKG~$J<9wpwB^-umscJl5y3&9FRX|_ox#uYf?&YgLpk>kIn%G(JY
zv)R@^nt~I<&PH7m1YHN|v75<R4tD--;~0@w+QOxJU0W!e@k?GVL^RtsKnzY_3Ml_=
z<YzUD5J1Dmr-E#X`kGA1+A-w}r6;=v6AoA*t)hc`-x1~2JY#!~RvlxfdFkt8(E=O^
z=U0H!L}269x%VyWK0lH^WSDANw5wH#j9S=1M2R(L8szWG8?Orw(=HSRfIjHOop8)-
zsk0QU5x7u(-1d6}ht9u0xx!Yh$$e4XBkWbEE7?wx5vO4&o%?AjsBx*4i$&^4|1&~B
z(w|=7P~mjZK_pFOKNQKzcZ_xg=9}}ke^69jwo^kC5KjW)kX4RPC=$U!4+c?B1<-wQ
z*<%Ga`J>=Yz*M2U{GpzvJof;p?~Q&Yqa>@gfmMuA9YbcMqIP8L(SS7^tin67w%-V9
zTzOFK9EDT+c?qHPQVJ7**QD#C)*sK(fen=lN5zolsZ(3X_4aUP1Ge=Dm+`@Z?w)xI
zBj}zqb;i?9-aJpYEFAq-ZqqK}u}V)ffQNLb%OrYv=B>XBj+E0(>h)C}mn!T^b@VqP
z&i^HJqT2YhyZ<oOgzUw)Up@|C`y?VpY~s0D05{=%PY~W;%x;33LP>GdrwGs}>|g&k
z3$QV(2x_I=9jrmzzdTnyJvm)ph!_DK>N^S+81dwpLVl3{V9ISvW8av=H~uwOu6YxB
z&=dCM>1<5?;Yo{a^6H8ADbIc!sCo5VGOixw{f3;y(N0({K2Tw<r+pN-&}z0}wR6aF
zxbhw`lINq$ZanAPdQ@L(n5UH5uIXmkwpG<}W-?yMQ@<(vcIF(f;n2b2C=~rQ`7O&)
zW`-~WZwPwt+Xt0`2F~Gax%JLw%D@5r>j0N_a-L3|o`11HVVFWJgCyope}T4?4a;df
zvdt6;TMqU!W1nKJa$gkF`x#@*3O5xKGT+^81`Boc5S4$d|GCF7lt>ekbAQzi%o%GA
z(@$l@s?k^*;nvtqaQ3{H`cuZ7os0g71R_?9_M;2w0<%yqsD2-TBN9a>FU_WdcqX<p
z%(2bK&_hfH8|n{gkQxve4c;K@8%iNMgoNt|_bBJ>Y(_b>QE;0~wGnDe?fk0!&i3hr
zoyKo`S9}m^WCX+ZquIO)2Z;W@;=$+ij;$iE2=ukUN!d{$EE`N8_l$FVL9%Q<<#t4Q
z-9HLX;j8N}t3L4C#(+hllj>dW7f0L3Bl3Rs+zCB%=T0-DcEalqx}gU`r%jr+6NAci
zEZv14C<0f#s9=6cc(HFhuRcxFEo`u{kP{b+bP$8Ho6FmukV+L*zvx$~?~IL_b@rL-
zf_u+E{RN(Rmlp}Fkzz#47_~ps8XTyx?Vw@TEl#W3JTCLR9K+3hx{&V1eEUE#Z*5-t
z8QMX?zx?CMEbIFcU&?=fQXk5CWRI9E_7CpOSFjnMKG`ulh@EJ2GZSc)3SMlugRWQy
zjk!E-Z}*VMmzf!%E*5dCKBX!dO8qQPTEm}}YgbuHIF3Qiw`LeeiYH~3*fJ!=Km9n2
z^cp|lIU^9I1Aa;JQ?|XCx*Qw}QAb|}Z#@je1WO<6G@ra(w3WA0>d$zn@9Q$le66f0
zI0UlmN?3bg0wa6wo0freq2-5s6^CmVQOrQ9yg9Jo!#rPzsijJ`fpmbFU8D<0(kr(Q
zaGf9fsEkGwfesq0W;z?U>8Co<a3kQo^rs!FL80y61ZyX}eSk=?5KvZIu*loZ(cw;`
zhy+PW2p{E9s;!E#;)*WN3%w#8l#3~#Fi&J+L$}JL)q(T}!{Q;uWq*}P;rEHF^K|h&
zEP-;2ZX8v+1zA~D18ane!favzUNtV*9N%2b2<hc24igCG^~hmnFLXdgn0&%sfpd=k
zY@hwWLYz#W;ZOd&rp*&aAjYt|mL&G2Vwx}%RtViBb5J0WOSGPI&@VL*(kbc>Cm<nk
zF8*#y@w(wVIU;&pR^j7DRFPJ?Yjiu21FBA;eiq!~RHbYQf2&)c4yhcebA(;I))?sM
zb97pjFrB);I*C)*7TOl~J71Rt)r2vIh3F&NelKrZ-b0$ZeGZ&Nc2^G9Mz24-;B4PL
z>}p%y-p~ckcfL$-Poz)$i=B#}U=qu{BzZufJCz~nqr)1{0H3TdOAc35>q&QfZF{(_
zSQEO(bG~*XDe80eo=!4IW$u=(t<K?(zIA8Z3Efz$`ERv+Cm&|7Pl?tT?YrM1LZ9vp
z?2PQj&MDnGn1IQUqAONk#2XwHPp-m^^KO8frss}C{k_V#e3fOtGST0*GT$nx^U?jG
z{{wY{bGt*4+*howV!-r`@jiU}*`FqMVNiFsxi**Tvy?IM-R7yb{+<*%zV_-4TAI+<
zVzDcm(#fA#>!#XIt<mU6(`mBSH$L9MUpi;kEvz5hs<_yG<Tz&a>t>0Syqesyq@<`U
zc}0qp{^h+-@g_crLHDvUw9DM2JhqGKiVXEBDrQTN=0QuwM|PTT->Os6EV1jg)CJAe
z)gQz<)DHb>@Xar~2+0l__A82^dc5vF17vM)2sO4;ix1!09?TerSC)>uva$RyNN)Cf
z_Oq<`HGWv^+9QYIiENy!qisiOX_psthD(qC3x)MR`?6i57QZ(={`1a(tj6t(&|hFw
ze5COVn#=3ub(%i3Dhhgys+sV&bbtLnR}YqqyE4DywMZ2DNvS^als!*(FDw~u>r}X9
zJL?YdHPn~>5*=6F=CgLnU92aV<-H+bgvZ$p@ttCv&o#Z}ou&<~S98H>mJ|<39@e${
zRoS(6%74#Ob+T1HPpewT^YH8s<|MsT!xR42*T%TNvARQu;y=`$Xrv3KhTd8ox*wDI
zGK4R=GSBm3mSNB4S{I7W4kuMbq<=;GyZR~M#3`2z$%L>^32(seLaTY&Yn}ehj_3J{
zGpK-;>?ju*qzgMN>gW)HhU1Y>J8&$=2%=K(iSsel?zeB35W7U!1(#F{p)5KsHn5)9
zIu^B0e9s06`%yn3M}@*y=p=)mc`KwcyaR3a9`-(hYa0tT<oxD2beAIItI_V<7iSz!
z1#6hKhOmO`g8V-N{_=Gh&mcPBV94K!gbMuZ1P$k(cLLa+hbjz5Vlp|_*D#!ZF0CN+
zE-?vWU#okqVm?&PVfNOcpi(h<Bi4<#fU1^lN2E8&j^xnrp2kz8Tu6fU;qt@fJfU`i
zBcOsTG56k-BCf~DjusO7DUZ_1Ghm`28Wbw@-(u_87%lu=I-eu05Mn(j+lOdf^^s#{
zVXkQo8sf#XdY@~Ql<8d2O}j|@!4epBOo`v$XRCbXTDiDep=he+;rBoW11($}Ggjt>
z%nQs15axz^M7z7Y24*qp*$uj8c=pn<M+0Ye$*Qcd#MIvD+Y@ta{>ts(J;|#TyO)qE
z6?FJEm#67d9Rs<qbclBx8Ya_CAwvF_ad|<JF(dg$_PwL!^yANlkhw8(u~p6+Mj5vH
z^=Uqi<H_ggN;rtmA>#m<@hncZw0HyaqM#9W`50iQTQXW@3<;PO`;=#=IiOdiav+lh
z3~N;0yyq`k8_bY;X~%6|sbC@pSTgP>AtkzvnEmFBsUvynm3gD;C8qhz+$L=iia5&%
zfx-l&cXxFw2ADgFsvS$A3^@W15=PR=KNw9RGP!>@9|#mA(5mKh>J<!R20O0iJ>=S&
zZt`8J;b6k{AMNR_6|pV}D83>Swg@WuQ%7FO-g>cT?+P9Ndl=bf^y7=}ZT#Br$_M8O
zAASrn&TnY4{M64gIiab3D#*~?{%k32#Yr<{d3jOOxMqdf^28<2?$hSH9mMyqTZ8PE
zrV&2!NSvsG)$1)c|026{_#2q6o#)AZ4%p)$I^L6el1k2c$X}`~#xRUm40BJ=Zb%(E
zT9@ct3kb&Mz+;r}XTlbGHlC0`FVzz$W#6S_(i}v5^lW(GEx&!Q8mSVYwzD34b4*(Z
zmnj6Y9r(%B%-J);B!DC5H!wHs@rv9=zl^Lctfs?vY&#_9B5@%62Ed`O+CH0&u4Ya9
zmG>#Qza^g@t=+E2kUR^PoO!dZU)-nGx*1J@z}AjCFt}$f^HJ{9$-Y(tBk)J*YFHpl
z@fE}LlTnI_9Df1U56cC(;!D@{$<iV4pfvb4oUT|x3&)584Jlwhm^rKU8-LhX#eZDM
zy?vnJ6J<rDjLe0NdpjWk<B;umdYFlOB9}wDl_nEbPdc5b=X)~WL{i9u3nj|+#5Az$
zKs_HcoM#(QIYl9;)TORZ5>+q!-jnzO7)5wYKLUAtc*u`^-;xvr{2SHs*BlcI>+4jT
z%~gba9hO)TzwOIk^D&b=O`=exoQbG6asJCC(N1`l9m~CJq#{ph(SUX9i7iSo(o$!i
z%Czmyox}T0lQg<uH5?_?M_D{a1tNnBB~ObQiWqy8z#!z4B^4=N7PS93m`3cB^bfXI
zzK4Tz(qQBZ$lr&U3RBlq1BVp9nXMA4)XR~x$}B-Qm&Zvg17*E78z3*IqmEi{OhnR;
z;F(T-7FLKb{}L2tW+KQ3gqWrT^jm}OFm~uLw55+et;%_AB3^5d(=Du>Za80Y3Y>K{
z86>Sy0Zv+9=n8Lg3^G`|9_>o^Cb^@~>4cY@J4$>wDMsT|Z5)Ar{8K~zEZ8DmRy@^u
z{tZGL=A-5HqH1eyQn^<kTIQ$!(FOT<g<kkQ^K5IDa{-(d5sxM9cNW*Uo+Kj$o+oMF
ztobN>jr_8$^NofO&=QxqIQBaC9bXFHU!lESK;BXHkghr!M^*R8S7(v9{n>I}DwjPm
z8g7cfyR!Uob;^-QufpEiKUzQ1g=k|Z>DsO+iXLJqWcpFso75ih{ZzxB0qnT+#o8)C
z!c)2(>FCgev}Z(nfAop%!ko$Fm+zfr?(&ptR2-E`a8{fSMDETumggaS<YyJfa>VQW
zC~xP+n#?5vu&rIyreGta#DO9<-9p>@FVZ-2YU;i06I#cJaKyPib#J}K*g7+v*V>9=
z_`7bie6{-h{=S&wQX}?pj4^e;bl&y*PEkpUu#au#a3(=l!mQIXxT13O&sDmvOne4P
zYWTA%CtU|8LRH5bfklx~WAgY4JJ0}Z3~gm@lR6ycdZ;y|II@VxRb~V>PR0a<zWAW|
z5fOeM<HQV4fqaSdxcckC0lF>={3u!l8N^3+CE%Grkmcx@_V?R#y|ZXNNqz4zhI6<=
z*wru*3W{R;(h_z&1v&*Eor&`cEZ?2t=eaud3&)A~NBQzqW0ikXcAu2{1k<lIYxHcf
z2qwihdsSyU#yLuqCl4y1{95#~3IX-cX47~h#^k1JSeb+n(Sy)q=qphCqk1YfC5)q5
z&XiWY-Qu;T`7xma6+W8wib~i|#+!EeozxB)Z)X4~LaP-i7cmZINw3uwj2n+hMPMrB
z(I~J+iO#y#2f{|HSj%N&55i#<2hza=k_y6bC*7=^IMF51ht{2ZV+ozLP{CmoCh<Ge
zKSG(!r&8N6v1=!LT4^Fq#$cza8*%#S^VfG#Qbl*Sec1CpWt~v|YKAZ@(-S@1U6tef
zv|C8wQ(BF>*gAd*$T@}Kk@H%SLjnwHg7G)YBk*nO?+Ll^<bZv9%7NnBZf(cB4@8-d
zE*S@bvzWHz(xRn(QLV0SnuK4m+LlYd#)G&>`Vq@5Lr!N@RiOJV!-<yjHyPyX*Xo8G
z`mH3h4Ze|Lg5^KkF$r);STvm=))4(fq5Bgqxdnu`r5j~CQaa9o1T+I+t)j-NX!O74
zsCU+hPI3EHn0Z-mAe4hqY0;z-am9)>o{~kuG-{vzs-%`qOq3L_XA#VKF(g~Ug2ha_
zSuW{+l}Pg&+tn(}-^LQQjh^E65qw9$L9l)-@HuNYi$31?mP5$i?4uu3BY*w%b{2P`
zDPRLVt<|A^;paaFwj2fvZ>FPTgy+JlpU!d32?S5Ghur*Kh_L!YIwE$Eut}J}rCfi)
zJzE#e#5!4SCMPcJ_EAdy`f<f?(+u)!K9=oip$d%?y|K)UU{@)Owp9b-mmDM{OvM53
zSlKZoA76KiwC{e2=~h^VjU>Pfh!=2)@rN(2e(A2>KPbW#*PRLfZMo#9oc|}bDpswT
z+c~}l6C#Tw)5pi5?jgg8L@^{vCEkZ{I5d5H@<r%`r~DCZQ;gh;d4{}XI0s)2mRt#J
z5UP==6pJG*NHH`C9c{a>{j!KMdJA?6S+M6ORznrYkujZ8){r{B%_i9X2Amp1g-9n0
z0ZTo^tzv_9e_I=fB#g+Mgz)48eT}4|^|}K0d1qAYyPM_23XM3OC>Hx<__kOKjpj<*
zDDbP!tZ;Y|T|_W023ooYqsm%5SMa<dI)&Dn@2SQ&GEEcP>xdu0zI>;X`m2sHP&&a)
zMI4}V{uYLbeBnb3LD-vLveeddp|9dK8*mWRJbSA%R_7CF@=*uhknLBU%DBdT(A9iX
z!oIpOk@ARMr0drGTt-n(sM6IQZ0>uSc@M3j{4S?&4lswUcZYzcM8F__e6ArN)p>U#
zGK2MF9gZ|N3&pHq{h9n!z0XefMp1V=;h>Tl3s#z$i^b>UsDy(;6Y`jvr<_`UolACs
z(KoxvvBMiN*8m_vdi=9!B=`iQ21^HDgQ>=Du$S@td0<gp|ICa7yo}h_K?e;Mb*zJr
z!=dm7=1DAvofP0P9$K3~E%t(<;`cY=7X_>FWM+LD>Z6@Uf(S}(6K~C5ijr*+m%hWx
z+O4w4<zKJA7r%<;EY}&1{(Je)P3FE(+K^@bcZT_ApK085fvF@%!`Vlk%wiw8%wLnP
zU+Y%{5ryM_M}ZR`jYhfY>(|p#_a1CllzNR<r!r||*1pN5h$uzaZ_Z#Dc4(>(DlJ=<
z)NK^KRbx?l9qajaN6)qE!xfLlRO%wd%uRs`br1%LSDWrcPzI<%&Pk>=LvS$Af`cC|
z)lKn9DDjY^%Y6;UT^~gZpHQ{EyBn>OyU_{`WEJ>C2S!UgPYte5tS#d!w3Qfr2CXP3
zPpgtA#Z+$QAM5^QK3-Lt>*S%a(8Iyq{wz8sDD9Iw%PmIcD?`5oE;%OjrRsoRavWxL
zc_lt)ULRk9B?G>0GFCoNoG#z}IJ><|y_!!>vVQvHgZt5ah3iOtkU;9{En9dC4I-`s
zw5vx}ijthj!cbrJibs>1)tr%Rn>%x1Y4saDagTAuxvXB;+pFcJaeJ@NM67L&^@LsN
zaqu~ngSUa{IIpD;EDJqn&euya$>eAvEUrWqj5?*0vuj7`Mvayhi7LEKO%q1GV+S?X
zuUu+;kMy@X(uZH54afcRTIJuy#pDW^vlw5MFB~H+q4@a9&+nO0ja5pUaXX<)cp%Z!
zkHtP|pmx6hZLBFNt%MQc8(ON;Gaz{}w8op&Xv2kk;56%BY?AQz)~{86%8}oDzUSZ#
zmp@*;w8mdEc(O-w7H{^Wg{E2M@iW^3)n^5#lb$!o^J)QN#F>r%&)-OyJmMkcUod_0
ze)PwR?-i$+SGwe6aDqtDyfG)5sF~ttL{=_8Fz^W9M}zHm;DBZzqG(0&kqQ*J8%{ex
zbl7St^GygDgkCn=$W6_*gsFVGeftWRkoYNWPBq^UHyLQkJlN7_rh@N7G>~CM`QLI`
zv3@(CX<Il1XjgdF%T$8&<S!%#gMb;j9rK2*|K26+10TfoQ}C$RX!}5agsyr$j<}%y
zdwx5S$+~9m8#-wWZ4Z=%dJ3GSGXIqtxf0C-tP#gA!B>Cx2q3<-DP{NJ)E@LBYG3rq
zg;BitJ=|mb@_IB#`ldR$%zMId&QsZe<?l!$-`~d)i?e>!5Ed0v<y@R5A;afX+|u!&
z?_+1ljg|Y+%@f?uR*@MoSV6_`t$-rJW|Qd$@NabvqSq3`X4W}>v|0(bB4#4Lb}~>Y
z$Cw*Dj(3J;8u;vn2R>2R0#xYEbW@V^TIw|KQXEm9Z0>2sBWSYvo8pfnIpW185gzB?
ziDBDmV<nGS)ZRV#%rG}r*-T3h)DBq$zwks~HU*(>1iJdiZYbDvxTb1twxr1r_FLjy
zTcqe>=tZIvg-;l7aGjd7m-6vpP)wJIucte|akYJ@ifp2>9`Yl>rmP7zWw`GjEmkP-
zIQo;HF7GaI4Z0#Yx3MT4g4JXhRm?Z`GStyUH$*krxZ+;#;RIy-EDcGb>W?VsP(gv^
zY1}|;`|T^;tYK2-FFl{o;OtG`Y}%bOno50kYW(2BiK^DEj_9-F$8V*+Nt~=<t)1=e
zm~M)bzH&}V72j_O{Cu08$I+x#YHPpyrF(MW@t?LpQ$>#Myf?+8?|mvd1l$Ds8bFyS
zUp;FWS`q$1U_=u35Y}Y<{UzzDZEC=i=M#=shx=vXefM$J2>$Z0=$XGd$oO{tcmD>%
zPcj9;fCdU$9?7N17a9;56=52w`P-aFlxdVb5U{*F!l4s!f8o5Bok^JH*%?PY`iO$H
z6oRgu_#z*#8CbPI#4NY4({}XAjFOsscWBnzIFAY3k0+T9Df_FA>9_i47eRmA!-k>!
z@{{OUhGbyMR%{LfXJ^JlDxHvD{YC4v*S_dxq4oUWSTakSZ!`WDm8d5(0fWT*55kI_
zG8n9aF6~!CL29(Xa>x3#=t<nUn23lwMhWm&Q|@#wKcxl(4<XK1%%YAc(wmi<LW!Cc
z;)*Nm^@3{$LB|Ph?T?z)F6G_7aL~Z;g5iXq6RUjHiJS$DR-@PF*RM%qnG}5H8Sx$g
z4}jV3cig5*C!4|LACQF0z}97j@VA2lQf>#}JGtbXdNgTG6!89r-~K8%9?~>Ajv79h
zl7UL9$w50x)FVX(<utcUE5PM#9RO5OyGQFKbCs5DGM{tAzeGo4-}WUVGuqmu{amrz
zKxgRiijYA<C136BV!|e9h^|JN9If`}0{<|tTyC#4&B*Qn6X{nt2+Fy7M#;J?Ce9;r
z4w^aTp5TAo2?n?^PpW_2%>UlmJ>n{F#OKi4kQBvfk<c3n{UWWfxz^yH=HFqlf{3%@
z4PCVoCv``8^*_p(X$A!ybjN8Q<rIDmWQaSSZvPqs7ElnkYtV~qbe7D6BU3D<WP@a1
zY+yGzWr=u%hd%9XC-eet84`n}fnhju`1?;X#Drk&mEa+*TGjVD!*!>AF>4vW0nax{
zNd{@2m;H_sta1PK$nUN)>?3BG#uVb-YgP@ou&{8Nbf}b&>-1P&Rp9<STE6P*C^4T+
z1<=(J4!k5r09@ktXE$BId5DmEe@~!?&AkeQUI4PDf6}qNLiIIko%@2^O(Sb_oww0W
zZ522pQaE{av@DjGXb9N#&A*u{HPkok9>^#LN;kT&2GG^}ks&UeUBOo&WWUa|J=fxI
zqcT4GCD1Q%RSp#*;k*4F(Z4G3XQF}_Vec%?u|<ro{q6-$b5ZkIjpx7L1loOOtN8X^
zP|$iK>Sd=H78!@f(@w}<*AY#$HVASuxx3nOtQ508K$HmTx1gA?nf-w{YJs#MEchxO
z8Qa&x$GqF_yq$DT8`-P0-9_5DA{k_kZAD4;6;%;P=b?{ig(dJsqBEvNNO1|ocYVqn
zINXIOzvD4yGpsO^VC?At=6l+$LFaO|A90)GX@w8Pfg7O;=#ap?v4k0b1=LDFc<Al5
z@Sd_B9eg`yvEpZ1<O^34kQaQo<R~Ti_eiMHL7!mT9vZ^ye+GS%SW$x83}WJFa2S_3
zo<XaPFynDj0S%K9eyy*Hvu}0Cb-=~aD`^KHzM83P9~>Oi@*4On%P1A}ajM#elt%w}
z`<Fmsm1(mtcdXD@$mobQ><rp4H1lm>V88_t%}V~95|cXZ#wuf7CH7eFOO^AVR>^FC
z%x)k|Y0yS?a|q(3RgGF=p9=%-5c)YM(+br1o(BVuF#Mh2N9T<=;vZ%Vz~n+DgEE3*
zuj@O-|Cn1w9B9s<ETa38^ua2Q*pnwufGa^-*~C9+;XHuqoarJX_F5$rOpsAv$eD*O
zKs=)TKJw6aTDoIME9=GrBJ*`_t_=hc%_wt`&0q%5wTA32z^3^VfE-G&RV9VXIPut`
zM-)fkOVz0G^4DhVqdicsK#B!s6KG7q$e#lWY<YKA6Led1g`6iaH$KIZneiz#Jse5M
zU4$Sg$^FbHxG-H_zrYdyJ!5G67KF(EM(taYDkb27fJGb~1xeH{NYh06s005Z#vgCk
zNtN?9n%kv9Z_;(;HS*u)w9>9tgG#TbyE_hiyF{<JeNZ8fhM9f@L3|yZ<_E4-VLaQE
zN9RQnUKxSHu|_ICOJddUdgpi9+L#rbCUtCGFG-_;D;g~aiN=&H54;P|K+VSh`Z^3=
z8_2McnwE)|&cg*jPR8^nSK7^5*8_B7-%kytHLNg*5H6}JHtmw9l;Ynm9a5!*KEb-3
zxcHK-0WhZ!Okj+pfZI&Wq~xx~>D202KVRi)f`J<pRcE;5vCw`m;p-^;=J*WC^2x40
zjjIki0!2?MXm%KeCs+v>PB}V$O$=>#LhHbRMtFhK42Ckfw3O2lXsb!plHm8U*}H+z
zNnSGYqJXsMZFF}2y<Ah?<-8P&7PmiA6|R2kLohip&;Oq}pTe^Z(TG~b1psMw;MuN6
z2}jQCP`&<E5DhUncTB>2dCZ&09E1<3M~p1L1SkTr2d)kgQT;M>F_ADfEdUe-H3k-H
zL7Zy?a;1ZL+nsSl6Sf2Z-S4xe!&>c=lfo*axPsYV#S=qfQ4)EzPVr0-g6Ka>7T!Zb
z4B)q~n=g+y<N>p{`Wqb`y@6>bmXi0cImp7QVq=<loGH*hsmdTTTz5;$)c$SLlKZNL
zYgAG-nkKz|=isJ6IMi4VSly^s7pFVwQc_Y#F}Dl7i7J<u6pHpGjh%q#ZTwm4(Nug&
z*tY?~dZw6s2?VNMo(ge-X6@(Cg#Z-R0nb*HoZ@6z%h?SSV2~(LujWp<xCKetM=tYz
zGJOkY_bIq=$Pm%?IlFXN@4=KGgm|niEXabOEdUsrS4{bVOf8;Z2j2}{{Z$o%cm6ol
z$4`G^$T{)*1>4(y8&r@Z>^a5Rx9;XUG9w-BKNeD$l`JNg{*<C1+}rRjoq8-q;h4G3
zM?052UM*k3lXK86)|bJUa}6C$M2}UXybzVAfWM_2_Fh+rEr2v&(c9LCoq-t<-MpSt
ztn?fklbJ^MBZDPq3AJBRm9_!(^tf9#*WI>r`vBUmnGpUTQ3t7lqH8NoWviDK^Hfd|
z<`uzbPff9MZij??0F;Xed8~lX^X<idvjBZhc(=7oc^V!9S0+tvrQ8@dYSbnOdRpa4
z7L~+#QwH1dvzV&d&7If1U|IUeboY4?|F6%x##u%M{mhb<F6=H)f48gL!apPq<j6(0
z(X3>E7r%dYrBzGhb5EPG5}=6-Aqm_wZ0!RSzS<LD8~q0y7G8V(+kSfYIJr<@lS0kn
zj|Iq=6iX2ZS^u%Ra8%I^20Q9xIWR1o@C)X4I>?T)&|?If!3A#O$$hlnFlhUL@2QRf
z9P{5Y2rCS*vi|zY`U7aZz<a95^YVgNhlbw;zSYM=k^daKeQxmT;QNw|!X>0&g&TkJ
zqRbL!iV&nFpK9OLko?DsX`$frupVirKpZj4WT%3)g|MYdciX?^JLY<M)@6m{WVL1&
zAl&yEB^#coCbQ_8$F;dR`R2K{v>4$`LBU%-YACK@{*SlZVgYX%aHeU;aE>WQ>YE#N
zoGaq7m{=N3`Q&u=_vf)n<p)i{TW&N0t~~jgHWpsVxJ*IoD?dYfo%c~Pu@xo~L<Ce1
zGx5l|WkEA9`^5?^n|ajZunOZgPj>p-Bj^buP{>rl=4RQSgvsfZS}fs&HTs^z!Phb4
z&bD~OXBpPG-1GF@QKJ^H&j#iF(d$6JVr9!L9RGI`3Sh#az}4|s`~MjIx5o>`M1B<p
zr}Hz7zssv^n&7y%Cfq>Ol|cGV>}220cyhAiErV2pa>#3Y-5+C)eFefHomK(oi}(DA
z4(@YoN4?4=vHdaHIQoFyzG7xG*aNNzzLO+4yvfHwm5Fsgi=>JVl<iViA7`ixW=ZDb
z)=~^#H~XzGeJA4DdkPX?D}OLt#&elHasP2iIeg&hYCk`+-T3nulp7}}$~jMt+->}Y
z%y7$}_Ud9@?uLD7O0>zPI1aoC50fI&m?CF4P7CmLQnrXt(mEt#cf2Vpu5;ePQp<<`
z1>SOsAl+TWX*Mir-pkXL-vH&ZW7Ev5<G%|S9J<wb%2VL`q}~_AF(?&2q02854T1HE
z1`e3jA;$RwuGZ1+e_m21)Jd=mWg5JZXZJ|X3tMgQ>7WV!Xx6y%4~xosl=LMii%-`E
z-xu=QRQWJjkq1t0x~~m(dcU~pUX<zhoY-8&-9wViijK05%w?FehlC@?J{mi7Sms{{
z@4c3|TMCB2Y`&k7WOi38`M3BCp#slA8o$1!2MhN-(>BoP7UgX2jyCE(aQK$y3R9Z!
z*_zVn<+kTA%6kq$_?<3$l$hlje9Rh57mUIx!fBT4Qu9>X-nQM^_|bJF*zE_ccUhBT
zln#cQ>6QesQ20VZ3lr$G3tYu&-bW~SZS5)w(bH<%7qVKi7#}_I-dsnRNca^byJ0)7
zEH(;;7D(4Stf{5eKF!BvimsZEK#sl=zn#gOIN4zbUk$7`JdmmD8v66~pw{<iyT__r
z*U(GCo!^R<QlZ&ToI+G+2ht=T6yn%aB>K4O)9f_+?Kvz%-DpJpDONa*I<cO-;!?fq
zJQ4e*)HCwa{(Dq~v1x+5olN?lnkzRr=+Ve8Q!`KOm<sn+d)(igpJVFVLpuDf_Q$x+
ztd#i}2-;hW3RB!#!#=2x(Yc5s$+(bKn<7N~;n=wI3$^U>ufr}rrElO5DE)q%1WI|j
zjwh~`Ps<gL7Uc4lsNSpCS^gFM%<RkK!h~DZ-0QV{FFOrtWzgx!x4?+THvZnfW$1X+
zq+e(Mog!f_mX=rQ{x=Ra!Oh4FUz-<dspj=hrTWckQj$xH<eQVvi`M?JEYa)F<fVQ%
z6idSPa(!`FysvK=3*21$-BD!fdGGlDCR}&p4wb`w{`~n60N8<zv8rCBh2#iw_HgBA
zZ&z1jny{N@u2Ou=kl&9E<co3=KIJLcwRc~gzqi#HA&s~TG(J;I`Q_)Y?3LLExZErT
zjEJ?78ko<XfJ%Oo!OC|YQ^;5LD$N1&BEUn`J@4R=SF7+dwC6A=;bKuuOAl(141b(k
zYj63M<5C5;DRAui_Jbl<s&FjNjSs5|Y3ygWKkg5vjjFAWyr6rKRBZpx-lGh1@_5|s
zgjtNg+M+Lf{c@9D+v&8*DQ@&GMUPFj>LiN3DSb+r=WFTEvg~zt&v(N(mD)@Hf8BFC
z4v?ot^FQs80Xm;-;9WPIDqzp%HrL|4HNhQlb0rF-93MtNP$BkSW8<QkvVu7<nx+|~
zE%ZCU_t_Jo-C5Y}IVan*ndf%7xUp47v@`F17?h~_&^%gNd!@fo&1)gSG#Db^lWpyh
zCSlKRd&@!@I+15!>onH>a>Tyx@t0Zgq2%~x_RN@=4w*tjh2Q1IpR~rS0##;Punq?`
zaGMt}0(9fvKkV%YOhlLy=^0L~`fNMsd?nG07)Q>|ir*%B9bJs&`2;!@>9{`}dXsAq
znIqWahEC3QCB*VKNsT3eLeeC0;Im!C>-(UW!uJ8>+qK%gk^lJ^<}V@cUS7GtgnI}`
z$Jj%z{+2+monwfZutY^gliJ#D&-{xe^ufjx1`d^6j5#H5Z3s%t+f=I@GfUsnahlaJ
z+xA>}K}`@qEo%0=)nD>No8DGB<gbjRSq9r@wR}Z_=8#!&?e@->y^XOCGDJP%u#Uag
z-i&*`E!DnuvMn}$KvXjN@ot-e_|;{!?%$4@_CtJ&qJ;zg4`pv1mDSpIk8VLiy1S7s
zk?t0xyAc8DZfTH|mhO^#Xej{!MY_8~y1S&${p@eQ`}6(%#u?+Bza9p#p0(DU*EO#>
z=WXG4h!v6Moc(Ys37qm))n~W_(4F(igEGB_eHUC{7Hw|PNEJk+Za+?^aq1xL&b9It
z$Yv%jcwKCT3CMJqL6+V`&0W|WpZCNC`I|z4V4Yja8cdi!UO()^(^`6gwE{U80~|q=
zFYESH7@#biOv&W}Ah^@?m+7Z?^d#H&paer{pZ$q><lLkkk^JVW^NgY-_p9&suoLxS
zi)VKHm{s1eAeTrM=%NxwgJr0cQ9IZZNE^x&7F#kf*gi;~W<=#CEKtY}*z3&w*pYRR
zZYh)fI5};-wyzkaTW`8zMq=cCeaGjU<(sp&m#@-VYu9P~GX}GFT9vDmUbBfrWGg_n
zy!=fmtLxs#OT??-J-1_`TNme*8+QVWq?LoU?#CPo39y<U$%zInUTrWb{U7*_8K%V~
z1h?F<Eep7{=Y_Y>(Rq1!At55Z*n86`1Foxt#tff2pP|wmE<S|D70LHTk)N-%?6%$<
z{}7_tdkK$eBqJw<qaUtbLNC@Xxsvu!n*2EClX#g<YkT7$1nb=9b&FSoQY4eeukRe2
z4z?0R0kEJNnbW%eFUSOK_5eh>w<M-?qpq0I+GGT9h#-ECfY~<v2>vDs-Fh)m)XC`n
zdn;;}`>SrhP$B&l)OUZ*btRboO3D)8k9p^p{j0(L3ebL4n`emluYRpEgV9aMl$_Q%
zW9?g@26<+4Pd?ibG<>6ihHo-yr_6sW^pfl_9$vl&wDb~)+WY|=Kijl#a7=wMMZv1^
zR5U>v8ZKZ$!-Wp+p67pTlW#MhQtTt)#!;<LHF~L-Pj$zLP=M&cq)lo@D#|BG11X>w
z`W5OkraGVh*jrn%!HX_GD-+}vhdzsP2HyP(OgFq1p_hEnwF-I(!Q0+~KSn7AOaS=L
zcx)q7=u`Y;l^P%cw?L8h2nS#LNeNuAeiSeOn)k_nUP==Tgv?1hgM_rN0J5oB1kE+{
zre8MLzZ?Xo|6slU-r&3YbBgvuf%ZgcY3R0(88Ws?{=9u?wLylK9`r>jxM}J$SypVA
zf3E=jD+|qEkgTDRERh?7@g_Y8@Ob;wK=XuoL(ySB^mp)o{C|}$_+^>fUfx6pc=XHz
zJR^M9OI=OIr#6{jQPNvB(#ig_iKwAXgjpfT-wtB@h!+WXkDz7o+N55Jfj1lgk2@r<
z?3DV)6+xoP;0EjJ+E5^Hs!9%%LmIrvPn9eX(u=M`Bl2r63XVTvhDj&_RHuhuK+D?B
zx@*$_ZKvrks+4c{!28bLp!MJU(bdPKpaX8B9X3WWq5Ef0ZtZUbVEFc%CGHs#7#+0W
zre+`F`v16s;^_x)1G=JiBJc(^AqL?oeBo6Q1ktBB;0>s<p(An`HZ|anS13yYEm6AI
zM9<|2Mv-|lw4GX#5|N(hfpl$X%3XWvkC_p0Bm<_pdk|Ah^b2TnqZD+C$OL6}+JBN)
z2IGJOJT3=zf<7(izkhFmkpOPMhdTvESSHuLYR7;|kU@vT9huVg-8F{@w=Q5&uAQ99
zQ@(YEw!lJC--kagK3;>CW*o~l6js3Fu-@&L9Vp7-xS<~|7s&Z#e*TgO2?=waTCYCO
zj?+C@XwrFlS#h8@VrZ@ipBf=?-?hHGg1gBFR>Y5FBA(FwE#&=Cz@mV~+e+3D>(#CZ
zrN*<(B5u~h1R-<QhwE>I@sp2*Pn!zG6L%+aJgjAtDb-T8@a9k}4O{Z~6XizwY!RRf
zQLX`*Wp<n2RkT$SoA{G{9_4ca_5ptPo8C9P?jZmsQaWM9u(U=Tj&Rg072s8!6d`GM
zZth&7ZXYskct!hX<Mcz@`1DG~Oy)Tl4H+=t)k3e13K>j7#sL{jd?73~zH0=ah|&iF
z?hRq6flqtYaxf+xit#OkPgc8&BtOOnc|?P9@O^W`c+MLG%bh}@yJ+Ms#qmNere`EY
zY`O{{e5DrLTOXyK@cXUZy4iVVzP`3021qheLI}S@JnV880ep$8ccs=Y`B9=-a7_3y
zFX-r>{Kn;U#;9f})A!4G*X=0Sq)XJ$k7V>D-oNp_IE9l<VuquU`}9(<L2q9OkR{x#
z-x>co^Ptm_*)Qf@2v`Kh2oV)#qR8g5#|ry%-gRMm-W>~bS<Ob1Ei}uePgUehSdOX0
zJV?+R+(e!qMQm|e_wn+7HK!{WPDzE#2`FFaM%Q}H)9TjPU|tY&hs4p!DMoA!<X9)O
z#ZCXhJXd>Bl67^ol$(_~H`XH1P$D19I5F+cxp;HSuRyROk<6A=Brjr~8*L!Sm-m`&
zzrwIZek}wwAMZfFM7cVy7UVX%jTWpeKDQx^8Z|{AJ5W8}9(}ggn{}o&*WgSH%{2Qo
z>Z!LXT6hm|K3SC}v;MkDVAT4w(B!4^?e{Yn($IOH$3jc&S1MPnfq~$Nu;wvb#(0x8
zydPw_;GakG{C8X~Cme4!8kS3poat_a{KS%?k<FC~fK{OK>hssVt#%UP>SWL7ak-K~
z_B^(OEzQtf*;cL3k>r<FHShV`Cg-ZD!4_m9W?P6tf_rzf<D52koIklh1LUQ~caQFp
zhHe*S`@X#M?!TFIc|5d@K7_4dZMivXJ(&6SX7B<VmqjCb&i(YQS>gLupR*9^eTsdo
z=Q2G5SwcQ#Ur}=j7~>R(E8jfGtxYbG@l{<trq-_9dtIE;D3Be<AxwD4#&I%m*{pO>
zorCJw`e-$-M(u52eVLwA7_H6b#HSZqYP*q}Z2FOcHz=BWlr<F*cn5T@OLHoE(-oZF
zqDP`zthHzU!_p`>jJs*Ic1joN^{N?#<2M@p+vD9a>MEJdQ=|fF`!m&n83<~E7x;kF
z-v~g1q*7>3vcWhzYVUD6YV&yFt%KmM1dZPx`M+*0-f!WdlFy{i@w(^2NQIA{P$O7A
z$ouUIxZtp)2t|b_G5-|d38m{WL!aj6eA~e*n?;4|S1D+a*<*hGZm!L`dG_pX$*(@V
z>UJq;<ND8mYXk<a&3k8C3rj2`bBT1qK2;E<=%&%-57#l_>$Hl!6jNkY_uC1T+})o|
zrj0o*V3L*LYjiWpMYyiExJQew*qs>w>iynfD&)3HQl7*Dc?VdA@Y1A#qQr=;(UIy0
zSzgo6Nzq*Na`hw=Uo`~y-F{Q}{mqoDc0lWA4I=^WLd9Y=Q?diDIiEu><3ZBmbh2Ik
z)KK%3cp<bhU}kg*_5==dRmAOSREA)LucmRSZ{3#(^B#KHSb9taVxBliv5T0=HIUOz
zxl=?S_i){RuTd*hl%VNLKtFdb!;LG4k|Xu15QpKYu+K8;9dK>+TQY+2p3^?Xn*oef
zo!8xy%-U6UGF=Q-6_>8NOneiRBWDWF1oFQ>csQ^rq|}84F9JS>PSsm*(su-n#cr)-
z$@KS?fUJfWZ;9Z3E9C%I0z=762v&*frHi$kfhT%S5MzsLl@^cF(QJ%tV&%IyI#ox?
zeZW<cMo!FMXf{EPN*B()-tNXYI&^tlBHbNnM7Z~`AU~LGO*Lbi7o|?HX!2fRvnuNN
z1x#jp8w{0%dD^y^dKk*Wh-{Xoq+SnoGIRW>EsoQI6%>eX7?xpt)8w5}aR$yreo!A%
z7I057x7b?QS(7IDbak?Z5Pce?p3XlMlcCP&lw`x#+u)~^=%kyj>*|19yGcA<k+NRW
zhT#=*-`%M2iIFS2`T6VkPjU@!T<(S?TwawEs}xfTp^IPCrr{5Xf@8x`&VJ~WYnHMm
zR_YHjD}poSH}5+d<>Og}kSCioWKZ2mj7Kv(9>PR4q-O3B`%cmR6D)x-H$xA0aPFpR
z?s{hM2@W`6?j|wPs<LoQq7L(fa*|)kXZ-jiA4?3%1;#7E+lA_uV_#weCY&rszdMlb
zHi=<~@{7Llc#n>RStU$!xot$V+{8PcmKn;IhV&Dpf4CQuF%^c5`Ae>uAgvAQgndf-
zs_ih7{`Hr4A#?U&9l9^=X$N(ivbF;FL_fTBu^LaYny8Qt4u@n<R`k_vVvznm?L@xG
zQ6=Sg_b|Kv)8BmgdW*JFO~mt(yzi4;tV@`cIOn`rC#tdC{gMgEin_d1xur9pO*0@f
z<XcchxenAe-tkJ}2&<|@cYPR4s0!3KXmfg2)M<4pEy5;K&OLN^?p(hgu!(Hc;MohK
z`^5-fT8ye4lj>x8!Q=W~rhrBFOBp}*5Uod2xkdMf?p8Zk+|-7E!U4UmPboJin-@$F
zgAy<aIO9)o8T(GTM5SUh>M9i!QLW*yntvwTg=6!O@wxBxez5!<L`Xsk<zCQSeM8xT
z^?I~ETu!vGB`n-CW7AW5T=-@r^U>%1W0uaAST<Hfje4;*Ltu^s4rBgKd)+*2xxn+R
z8{|4pGuuaEbv9`L8nL@ez5(X^GQ?Ne`OUJeX>-}hkFCmkRiA%9!zz{;<h3H#bm+*g
zu0oC~lDIFVL=w2?<Kj~Cc~>xN2zl`a1g(+Z*Z-@W8Sn@a3c8{n{~NXeZcOd!xe}R3
ze%EE<S4|=+pD6~yTD4vRL^w%41wx_7*fb=g3jpZ_R{fOsa<x;UQ&%7PrlF^<uGNG=
zSBdWywj`Fum><tgLENWaCO(`cs7Ymq_egMR8N2-Sfs1Mus7eU8Nn?FteY-j@Tbf+d
zuaZ6kmiV|6ZZ*P_zyNiL{3WWE=><$(S?0VS6b@M;Ub2?s=5L=M8ELiY@+GBoDR5KR
zkkLC%e6B=Sda_fS%=RF4AL>&sQG}}2K4d8M?4dLdQ7G|6M@=l&tn;rItt?rq0YWHt
zqBD!CV;b0jF=Pu{obo;F3Tac;V`CKi=Yz<cS&5LzcRxNyeXPNE&g8xS`VXn`b8W0y
z3F^UC*wFZxANwQNRGrQ<rk=wLzPvn~=|knJJzH5j$bDz-7G00h?+RFJ&g@%)+kl_>
zl8Q$zsr<WKK!><zgCoRAM)1Nr@``+GuD5_mnwttzAu-<M?fXX1%go8|STMhz%7D~a
z!jm_ox=La^D)#-9?Sx=#m4pi;6$W7IK>Ki5yZS>ReERL;%PW3B*NBVT`y*Hd!8Zoz
z#n3;3Z_tYvG{J4#*6nt)n=Kh=pth^>@Ps5PsL3AC{7Wpdf=Kv)zhM*=*3sCtGf6M}
ztVz+Be4Pxh8WUjX<vU)UI235iqdN`OvKh7>*Z6c_JG+qQ<>|U@cbvksqphOV%>%5U
zjA|<58kbGOg<zd*%$<_4_Tm?Z!iz*LQZ>wiAjBVM=8B^vG-0YGhQ!h}!IDDHjohz>
zL?&blE-$xvg7!9n&R>Y29quLe#mz3KO;=2)zg!|e3s2OD`$yD>DhcYKO)I5X5OM*o
z+|YzBl(ZT$+S4xA`*MP1B0mM>R3u6YGYUxB;!JphDk`2lh+}A8#i$cv&c(`+KuXs)
z$fitZTMk(ZrI(a+qEg_pzRd9ETS{G<UQloL#l~~gwK4-bvxPnI%~mhdoXO7HD?;Un
z@p@z12!8wiilnO36A@o6X&d^wgD9!3H*Ke`>N9-aZkGg+u(0wQx%=#TFY=ZBm2P4m
zzsdmu)na6@fkg(O|9uqVEEmNor`<o_nwSs-dCfrf?B99Nzg-q^BneGtV3=Z}pm`h>
zPLY%dGZnD?{5X!PVr;5D9&F2qh3vW(qL8BKafVN|g?GW=-U`F`VIbPG1m-cUM`9dN
z4Hgyg)%Cf?%eQpM{N0d(#@;7Zc9gQi(vw1Z05C67vNABnR0!d>tkIa;qrz3Gb?-Ji
zCFBUE?(Mok%h9f0bBN+qYd_@CFd6>Y$aOueH#BOm`!f{q$>7pxcash@U8gWK@U#jn
zb`i4f<qicQiYXE~+1*-EOkhUl<(pyyZWB@Z`j#B=G;@(_a}0#&t(;}E)wX|6qr0pz
z<t_@$GX=7Cs(mvSo$e!_lU2gO6yA_5VGqf8m?YH?LC@M=&s=D<<GA+6vPO0h{!(vH
z;Yg|}d~el`ra-5hT=lxVPk#@?Nl7HOJx!Y{al4p^o@Ayb5T;P^7ySTmTX#AToT&G@
z-Q?oqa-tAP^o&XiS_by{c~)D8A>lY&TY?k{gi)&BLVr8jc4q2v7dvyusY2-0lyD8S
zn!HUrAs#tgp&cB8?RE}FcC=Rru1KGBqY|d;qpWMb+vXF!4+2C)1L8cAD~U8kGLA^@
zN&|-Wp}a?z<lbjb{@6%^K7!2l#Y$WRHS`A=81iS8_rT}HdhZp{fk=0(TPS_gg2$P2
zX1#NflImKLtxnFMUR|oxqzug|Yle{2I9^#E0lNu&v$5nUA5Z0Q-ZR&&kwGXJ7d;aA
zP3)%FV31#b{GbNQ6XckVoa$<!5cSH}$hay~8So4%flZzS+dXsc+ZGko=CUs+8Ie@<
z!)C-G^h=8d1$pA@JKaETr1OnMzadaH|IUzzu40r(mw*jv)Y(Vfe8_ik@hdSS{ByK*
zv|U&?b=O&p9R$J<E-EUtD5Oa0GQftg9^umx87X&xCavTvgkf$rW000yc=EAVkE)*n
zZ&<5=R%j`Z-&!WOLzMA=f+qOINHs2|Lvaj3BcIWF>|1I)E+d5MG*K>ywxBz0(fN=;
zBZG1c+64LjY>ChjlWIw3^X0mQ#DvhfY8~>4GfI!HOX>aZ6^Y{Z!>uyDuMDyl#!7yq
zRNcm<MD{n%=0hIeeZ{}$mfD+i&iA*#76n^xp|wx*UhqE8(s8~so=JL3y}uyHyP+9T
zHD7h~)k@frdFKAdbujN005+)=NBIcbLQyUOXkEa;4;(w5wWCGRyr7Hho3wTcukA%_
ze!B@U;j*I#x{Uk|Uw5#~{n5`etRP^jZ*HiseF;8m(<v*#q22g+So98$JL7b+Y0P}m
zUAdaXzN8DHCP*%ioMae2#jYVeji8`GXjPcYM6<g}d4p<9QyXyg+3~NZN;mSUIuvOf
zWD9L_*}}dkY6twzS)~SnH>TSe#B_H0H^(b2x@!nYfdty*>(LCK2$QZP&+E2>Za_X=
zA%d7xY!u?Ut)~C;R&oC#UK*W`RHsepm-Er3NCKw^k_1chaqG{8+sNTezO5&N9L8PY
zQW&b)8oEayTi2Mg9H7`69j46lcy)dze<B{v0-vU>4OlAXADgM1|D*>2nu?J*4KZ$K
z5d**oNc_WaK&}yDk-epq{j?FTy|C4a0E)ssp~TPxYgoIie8qpkurL1JlKu!RUL{pU
zq|Ek1F*6L9M`(>)@xN;1qF(`ugUrL<c!B_+q$^k>>C;lc^*M#_*#l^P*gv?BAOLXx
zoq;MbVAd5kY)ySN1~AxH>y`8x#O7ni_-1gADnam6%=Sv}Uj3P55rJQpnU&_9Oa6_^
z#mF0l!rT*Jj(+{VKq}$*F#PDlUFsOzIDDrvEd^hB)X2^GK8#FAIa0LyKU{#DD{rY-
z^R>)T^~{d!JgHmCqx;dLTgr$pzmfs+^=t4`$sa4k_c8EJHRq&p*8$Kcbw&d$xhQgW
zZP!l<VuSPij~Kt8bxZ~mY30zN#gbPCo+0je*xLndLn^bf8WI`ZP-T<|4dHyF>n-J3
zryOgBz-RuL2kFK$yc~d)jG9a;)TK{<C%=5K_xV($rw7rlgV!k4;OY5H!iZPIuC}E-
zMbhTIXFbsB7v}EX2gPt8(=%mWP4)I-lcXdiyX0tQ1~K6Lu_ZAbr*&|!DhJkKL*=|b
zz$18Webw+4iI1fF@rQDuVG*4v3gVC5g46Lbnmvo<3R{=GqrC}df3`>O;HJy-kDn80
zZ9O45mlR;13TQeG+3;8vye2>Y3)EE+YrnnvqtogEl$XhJ2stQ!9h7-+P*Q>`MPoGm
zvj7#BRV9sx+a4RSd*!~_vM*(Y)4a0~gU1&tLlmE5TDkfRpWpXCaFBf|t{W~yvuL-D
zq+E3dDin)jD9K=!lC5d@X2KzDHP!@+{3{I04LFiffxXcK4Z}Z;b1G<YfX2EFshN1X
z7^ICsHBF}XR#poTQfYR@b7I-nwL(>>=8MqCCu=q$W*r%fkbef>e}L4tgHS~J>`*GR
z5$Hbny?_#pSg2OSboiR-V7@WRj*(Eg;tMIC`+*dBg5>VXTnKz5fpo80ejcdm#TJ!f
zUq@!i-@4lt%{M$0;tyw!maO@yHp;qzBDpu`yq{p;!LP&DowA8EKq+^-$_K5MR~WQP
zg8Z4f;czK|!iehT{DgWimHX4>;esqUq$wYMP*?4maZ;mp*4)W{Uj21MNUgWnUu_~j
zT>~Mg{~{@u`}rW1RQ-vksQxpTj3tXB5L#>nL3NH>reLwlb)WYzU<(wR7uCBQ4cRiP
z<sw~NkPfN<)#UGyjfgFV@nM}}6MUs;hjHZQJuSBJuST#XQUY2@6nZ>mxrq^*FA*fw
zxNOHiNquDSm4SWrd}_A<`L|F(pN6{iRo?p(Y=Wr|0EIc-T;{P{%vx&mlGB=~b6oA%
z=qVv-d*~Wf2LgEZUSv!iZQ>wu_)Vc#2Rl_K<G9n6NBo}eMap#rvqPdUirB&{L*=X?
z@646_CQ6i&+3q?QLZY(G;`-+{BDN>{{d81~%5>_(mP#7MBdOXIvEBR(n`Ps>Q9gI0
zADz7!%HC0WI7<R3Hb*Di*1NuKqi=4K8+ERGkRdbHxhkvFa6#|yULJQnVw<b8e)Keo
zaq5G6Gzvsq2}6KSG4$5=;t)Z-&^S>P(@pyFD3|8Ue6;6v&HG`BAD*sO(u4DjBUuM}
z#TLCUz25-&lV}7?;rOgt_fGP%uhyP(-trkf%6X{N_lb|S4wve(Ah~xv+LJgdRN8D5
z&tJJ!8$CqFYn|~^+k1AXlnt<#aT!-UT><yPS+%b5ybA$TXQ`mf>MHB$ul3>juI!Rg
zXorhYs~r*2LK7M9hkyPSw7$kx3CgV7PZ&`9W1`_fCmNhxxI6<Q*u7M?l!JhqALaRU
zYCLlY#jiUJ*&U$h)lQfw(f_bnu^?|bR$$Jq+93Oy2FtBVG$7IvKy00z+RFj5Sv1ti
zb}ie#vqQHdu~@^*0;}{uu3cwy)HR&KZ}&X0LOGS+>(Z1DUT1o$t~5^MNL%w)%ilPO
zzvEhG(T#$hN@mC)%IdRb?lM26B>7mUqor1wWnUq_FNpsm6iy^4)u_ajdlh-LJNpk}
zRb2(uNY=TOi2=FrukC!3Q1sI+XHA=R9ElzhL{R`%X;X3Ajt6Xn{vSZqj~j)PTRv+y
zh+#SN1>wtZtmm?l;}dhG#kF$XT`VY}M^HTGI<(0vR&buD+@vA}|2gBB*FmCsAU#JX
z@@h?k7Xnm^$?@|(R@bC(kn0<Qcx8c!t8SGUF>-f(6iKneiRyq!?>8M6jo0GCPb4JP
zPsKlKv1s5sdz@{e%89pB{S=_kDxECrmv^iHsl|}CgZ?hJm11UQns;J@?9(iM&lJ4j
z%-2i}r0QNg$<uHiX?${;X)l}*k@=3h2?bpD5s}ryg!82&B6rn{e>8sbkurl}_u5=%
z-#uJNOZ9Md+;n*(a630q7$CI4q8)VQc2%$N?FdMXkrK>2Z#S3M7Y9=z6An3P(Wuu0
zMwtel7h2|9*u%CH34h`F6i&rc7`0RQ{k5V`CzlmRRA(c1vEMfm_K}0$Z^bwyLEFSA
zRNbn||F14Wi`*F=&8sOFn~q_$y|c0nB}uT!eo=d=^haQ%4drLyePwt_14h_y6hNNA
z5QD!C7%fpSsd&_^&oc;XKLkMwJi|neCm$YMu|_B&`?Myi<=R6$$uswZQz;@IlP4lx
zi{YJ|CS%&VE%y|&i38nn%BA;Q#yR*j%3lft!`%Rx_dL$gpqi)BP~||D2BzRaOupkI
zaZK>*hG-U@di9Do93#<tYQ~tho8`}=Xump8T9v!*{0{_`mtQg7**2tn-8TsYyXSjL
zALE|OnPF(^O_i(WS^teff#c=xkZ?8Rt+i;h(AgkXN1|+o-+mQ$qNMtvaq_GsXbWkD
zRZBDKmYe$-#($e@zWSvu3wRF2>esSCJxqvl3JVr&f%4X+R%b09_qtL&EbA6u6c7|u
z4AHfsW95@xnuh5EB-aBOrw~whEI~R>avh?smW+q`0kIiWUEuoRs4%z8mLM!Z(#C&t
zx`E79rjo_)ahXfZXoc}91ImtZ!od8kJV~?9_m;qL9Pn$xm&(37l>#9ikkgaphZs~2
z9WZpWHTJxIX6#3!WGLv>w}pNSln^rflJocZX__Pp%@HB!+X{PEWS;@I1YwS2Enx=0
z*VXbJ4wke2Qe;65B^MN9Y9gG36_XW#B#NnwsIEJx&wkg1fQ4uL5L)ma##Y6felDLd
zLoxmHOi=FevuaRg!BOmh(w|F?JaoyikCq=46@?aIjR1LOC*sh{bw{RCE?JGL+>FAg
zNa~d(rW<;~`_VB#a9Lj;OAH>3veqgFkO(tBMtL&5qpH>9kEj4@tY;sdetM%o3L}T?
zjpjIEJ>r0e15Tw}vEcp}paDp3w`CrI#e~Dfm=w`E&ktZpwJn-oqG(t<cWg26%raqE
zU6z(==tX!k>+o>L_ZukqWC2~m&N=LKb510J@W(VL*I&dseVowq@nrM0W8Od4|CHr%
zy=q9pJ_RM$NFcvilDOk-xw^k~pOBP-tydiloS|lES8lfaBFiPkzimw;<aaAXE>{^q
zP3LgKo=1<Fh>wNeQWbc71Cp5>M1jfV#3V?qnv6V3KDU;dD)-lfnO|QiOy|1ise6bp
zqpr_;scE(kt2;WslZiq2`P_5C$$Dd}sLkpwx=0TwUy#e9(iJHRJbp0yA!*kK5Iv{_
zc+&sSk(?Bm0Ovz(cyJVsIMbZP>Y_d~1AHAnKMY50T_5zg=L&XCLd7aY<i%1veMM|-
z*sw>P*SbxX=$&DhKl7`8rePdB+c|)&zf>J$Z@>LUam;dz00L$QwRf_A2F!TSfZ4Pt
z?-B}RWZv_D{%qDKLT>(tGZ_-b08IOvQ^hU*Rc_`}8oZz&;I7nrEz$j3tsF^8Ck&Tf
zKSRXp{Fks)1et=ARsj!Ucw}nL(>eP5hAC#lpu4TgoU&QxQ}b9sKHtH?i;01cRRUh8
zyca4erlsl$<hzlSaeLMiSt6vWDjEPZ6=M-_G2RKg7F!UrqWn;nIzDaJz@{O@(<3yY
z%3;S3x)qgY!Uvr7hMFnj9KK0kog#ZrgbI63onMFz6p%rg3~y$L^Mfw6N>aldBwM)i
z@rcnS30YIP60MP-(mU%1u_iG@Djz;h*5NE+cEp+|S(ywTeG>se26HZiyh3;?UKoMf
z1M}bN4M3WFh0qPhP$+X?@Wqj*Q|${9EY}K#ZD^S2y$4W%<{@E{@_7uY_EqF<y?{#8
zqSOr}z!K|Dbu$ELPLLX&3{CnSVrnyau*jL~^eBc2PB;eTNIlQwYgc@{9}ObyC<8MS
zMn%NIj^o`;YV(0H1qAI1jX*d$oWX-kKWMRwt6w9$F<Z9r(&XfK`7pB6Tjq0UN4l82
z)BUp};h`OAl>9ClIxh(kE5XSWG?I9`C6g7)LFvzOgtfJU7o!J>E&3J;a15l7Q1_-I
z;GPB{J=cazSoRted=UnEZ8#|ojnIJ#Gt&5Wv}Smn1es?sXqjFk^E{QkuP05;)IK1c
z4kT0wIK5?)evun1@|09nX1<i@y-cuRuGQi7dv~q6F*Id=tF&C#k5V|ZOQd=h_2C41
z`Rh|lr0Kop12{>qO?(iP%Vr#Vtd9}FJy0<O)iXIIQz)H~KKaYwhcE+0e@k8}4Aq81
z)EjxA1A45gruCro(UF6LL(f;CVwrhwVVNAZATE146=j|J%&MF6RB4Zqh$K0xG}*EC
z*YPtB?Rp=DTN@mi8I6)R5hNN1b)Xmk2{9nmaJEB|ZYKm>KU^MO*03xITY(AMGC<Ut
z7ME-~bkUHoi<t}k6-4@#!hp|cwPU?(%3-sEooHlSNCs6Ps`L9BgRhu~Lp4c<WuV+r
zW<~5~H6Ap~lgC4MeT|hr1LUF%ZW+7*4#xL>6BWY>0+GQ0AM7hhuu|LWE!!owg7sR?
zv#S%u(iPak2ya1%Le!iQlh;&5W!>N3Bgn$#@fD;o(hzEP5FFNhkOcZg76F<w9GL&@
zUpOeeBOskh4GA#q4JP<-R)B$<vUska%yX29U<tLS@lf%nY*P8#Fep00%cJg}-}I0A
z)50YdWKM@>b4BUM?iU@R{N)WX<kRR9lA|Ei`!M14(^aNa>hoVc3@9=@6O|IRB*2Y1
zToYeu-V{-6Yx6>*@2$AvtmO3gM_&oP1B_v@30z6a^;vV8^SKgTdki%NQiy5`EsOeE
z#3pC4yH@nOpP~q(trF5|5Kc=4B!Odz^JOp+8YvN$P#NGp6_Lk&#85-KAa=fIH!(u^
z5j<31_@2@f%jA^aP`8|F^n9nitBAyjp%J1H`ij6J%il#6Q8GW)IA!&N%LMB|`q`V@
zUPL1AcfCpD;Lr)e5TEb2?C2<*?!ThzAfthJs3n8lILSiuJmay#LMY=QcM{m}CZC>R
zzSqfQaNu*l36c$TUGaHp-lMYL<`_gu7T)r`xmz*Yd<KD~piwqTSN5S1^AZ-8${A>s
z)vGK{C{#Yl(7g(x;>~M(<$&q-zOCe9`wGr_HZM=HPTlx`!+TnJ+Oc!$o;l^jXQsiL
z*!?tk!X|&rGfEmH_<VDmWy2CIBG!a|ud(m4{<l9dszjmi1#~qQgK~oSL#Y7ZMD%gG
z^34ei$ovowRO7S#ejmRnh!r;a0Y_PcDjAY%;j2H%2}9Mh5U{n_e2-3};o4uT>z2D1
z;qIgCx<s+_0>csh3EU485;5F`8o(`e4CWj=Pko7?D=l;M%8I_}J<)W<_;R2b5jg50
zFNNJC_-kbBdogZdl|^NtTXa-Zsj%$y)l;d-e6PGQ|9GZZ$g;#*Xzmo{?mDFucmAHd
zqu7?AWM+X(VxMw6OD!Cw#?4BgmlYd_(_n)=q0C?iZxl8U>q-VFcm<f2p1i+4LO!%W
zRv{Tf9QJHAeW>@X{QpFa%CW}<+^FyL!eWTroL-_G)9(^JBM<X|QnwsB2}%W+_Wppe
zzM})q)**sul9o3I2vwION36&I_r<E7Q6F2%Whq?htXMMjG^7i8#WQx{`O`9gKpJA2
zd7hg0FOLX_EE2&l<rWE3le7LtamWBH_5Y8n)H4@Mx?3-$=6d9dRG^vCX&;0te_R7G
z3B}Z1<hItg(jFotihC}T(nFMNdEty_WPYJXkNh9hfdb0u!mKhiN~U*1lFfX<bbody
z*m8SbhtYzv2PKE`U$F?*pef8;$B>FK;SX~N5@m8D<FiQt^3GO8I{mg`jW;Hec5A?x
zX%9{oL&~?U?}wxbG}4N)MQLy;MBA$+N9~roO^UWeYBk)XCE7bqnEc%%WKcJUTGo<W
zgHj*6p{79}P02Ur3zO=P`&2Zt&hGj?_<HwSx($;MKVHf81eD<q%E%>N#|^D6|ARCs
zS1p>>nM+JuTsW*9Hdwonp-nSgY|)f${T0Q}dhZ%Ng38Sd8zqWYLKHe(sh=l!jjR&P
zIN1oxkrGWTOh58jHnH^GHj%f*GV7ts?dh^#Nr;3HqfSNM?4V}*%eE?grgm25+I8JO
zQW3-e(Wf>lGaK?ck#qRzlD%uDFUl?s+q5j^F!7tN_xIiTo+N&0$lX&?8i^_@vd}ND
zaoL_i()G3yO<7Llvn1<F%_FdFei=AM)Aw2ea!^oxn>M=RHUIODS+7Y9AXW!vJ=V`a
zy@^aH^6<_%-t(MT9KcI?BIt(&b^UmwK&@)gwaSG<?TG>?S<g2H66ee8AV!q)!1#g7
za*<N<iBUzBGYGbIYn{P))cv8RNXy<VPvwIllO>)k(lyzpppkvGpT7D1aPKuKbOk5k
zxVF1d3u?ziLau0!WTZA)YzA-t0lR4A#Cp~7XOGx9jd+}Ub^B8I<bvAT0j(BD6HXoM
zr1maJJ_EH^s5RQn8P=4u!}~|OG@$q&)Tw5WKk9N{tBG^INUtko0xY3EDQ8QtKrJvd
znw)t~i1m8d{_@JYPu6!?-8LecsX>OCYQ-@+is-bsj{qlxO6AcPEdqILp!zl{x#980
ziWMgQ)J9>(^mS79H^z#BtL&wblX>FpffmvPS|jcoYAw2OG8Wp4S^X@smR7<Q0l~==
zc-v;d%%p5mS+q8fktlPz$!7PEYoV{xlnn@VI!V#19WvkBmOQwAH4pF&#7A@xgK0-~
zM}BAJ9JKg3^r{4G{voL?cUN$Hnk)){RrW=GZ?=Tg1C>b3akZ-e;G^S*w(0ahNV2{@
z7YsKGj549zWEYTh^7~zK0wqg`e)9zspBian%Nj3ZSlj-Um6;%HVg^#c?@ml_cO`Xs
zy%@oAmezE2k(<%>@M=x^_sAz$FTS`^qcd($tbhHXsy}TFzsYrtXi@#K)Zz+3`jN26
zbQc;)?OFcZHJ$MK<p8Y?{1;{?tDB&O`AX)<1t4}(M_l<E$S^(g-7&9FcjQ2DP?^R%
zP=HRpr1m+<7M03wFNBMkpW-DGlp)5L#^!mx{Z4i!wfXRaR5<Zh6HpwLFS;mA8U=E`
zk~~kPz35CGG(dPWboIYaygXRwBKe0wmhuy%SQdB3KS-`M%G7nB3D#%IY+(<1wS-t9
zpH+Geo4u*Yebz5wO$)VVa0(!OdeH_&m;rT&K{4a&-2jQzu)`~02P6)x)N=EzmQQ>o
zqs0ylG^dkPRdnnYTYfWJto1wy)k?b7lk!>OaRf!~6%1)R)9CvFJ<7~k9=;o0qP%!5
z@jJFIBAS%t7O#GDcwnMqzg9{5ZW4%vCOw3t+BSNXV#tMaUgDNI1Q|FiI79<*ZFnEk
zmiOHNP7`E(`hOU0CyydEicm#TEzo4epN$Xo2YOw>WIjIMy$v!4^c@jD`~|PeHF{p#
zg*Go~AN2YEi=C&uOz1Fl?u@rP(;4ry#e!i`0#!}HPxg-jw2j^)T;2O{Po4F31z8J&
zDTI^Le)ApC!J|gst~9_KCf%2#t)}hUMT7xR(;0=IJx!P@5oGXgEZO}JN%Hr;XESM4
zd#avf8?W^OZtO~A+#g$PW-=js0?KQ5u$EVKjkO!6>bvTU{$`{H;?r3q=DDHE)|l^Y
zBchA+OJtoH2QP}~EW17pWvI?Kw-eo5-BZh^zd)e*n6A6%^`Vf&N=Z?wTXhBH^8Dr}
zvaYKgFRr|yf>g&rPH3f_^8Gg-*hka#10kZuq9?e6+7Xa-3+WxKt^8ZI%JD4gvxJ0Z
zsR|l%)jREvd{OQLt%d7vJXu+IcNDb99!v7z=~NFnH(qOW=ani&t9(~M=gRJ;-7=N4
z{0_pDJM^>{@*=<^E+&+||MNO*2ac+-IpDG&Tpd#NV9DfziK*uQ_;e1$n9}!UhK&iG
z^6<F#oOUy!ki{H^Nz0i?KwaA?ChKVdSBxD$Y0)A3=5rtNt(i|PkqAwFyqZ!$y?yJU
zMSyXxC^Qc5mKWFl@(U`I{CDrV`5d4+h95JPm_uGEJo|Mnbw2_!tfvSUyRtuN<>`8=
z0Bg0X%zAH5<8`Jxr&BD?-#oQptync?O3Gax+XB@Gk`e%?^~EyO#1X!Mii_sHxzUs_
zdqpaei^wh+WcEtEjs~1fnzkQ4yi!%EYSJKVUT~H0wlV?kg`BP#<a~`^)&8MwL2jgN
z@f>E*9sDUI!l_kLtE2VXwhGM9H28356b*p2ij8a>GokB&kUzfw*EJ(UVh?}8rR~W=
zYOT|}n1&yVQ88UlLRGrRc=qQC3iP64zM1Kg;Z>BgQq9r|tgjoz{};KAQwNG)9(mvV
z#;hR&%0}cu4!Rf>_pPZ6Gk&kW-*1QI;_`bv+Axe7T<@(sb0r;kDg*B1_fNn{GsSA?
z_VKrwm2W?ZT9b(wzmTKPu<w!H=NhtC=zjIXN$=Z=wI{vg8GzkWywc$>6SRcz&7WZ1
z7RT8_9Sn8iyVEkVY4=apnQ*r6uJ#UJLBTB54W<&Di)-fZ2r+5zQaDt{wOCSzclIQ@
zZ`3x+W{V3<P+b+Bj(tf4ykkYOYmBLjzF5P4xDLT;jEy6JCh>qS_37~r*X5*=Bj{c}
zlQt8Ad5t}(eoFN6#jAmob>(Va>9P+hW!lvq_LPj&ULXkGCyl-ObB={W=NPfU*>eEE
z_`};);#p`Z58K(4bGTfeVCp(~Qe2dNn~k*!FBZI8vR9IjCE`J?V||&1wxnP2bb;f;
z_WXNCDi#rrAlj+c;&-}O(F<}obTk_>DhYU*yGr&V5B*pAKHdpQnI}fQZoj8w=iE&&
z6MIiD3~x`O%X*Qj{v!F-!cR$cw3FU^z9nK!Z#c&YnR<gq^yM=x7t4zvUj>c0W%tbo
zG$M6ZIt9Uzkkf+FysXI-R$)PzhOF$*eSCArm6VV2gVv>3@%1kO2fa@*q&4~&sTqha
zj}6x;w>XUuJ~^x`Sf1|lBm_Ts3bc{FpG(%Hv8enhuwGEGaTn9GBhuUji)f_PCcJbd
zidF>tJbysl6T3#nr)Ht4ayW@oZ&rPV>&O+ac8H;X`%5H?B;i~FVjMu#nIPz}VJ<Ns
zc1&25v$`d-5%_+G#|9;l)iAZS*n$d(>a?9G8PQBSMA>9>o@*lDMm>EJv@JYgFm82W
z>Foa^pw^!vP1uN2?u7z|txiLD(m?v_nxqOIhW!DZ21k~c6Dv&WWjM(EkqXvYBi{(s
zELP?Ym^&~W5nqP|SX%?KAA@S4S?X82D-}hZE@2z>(O}@{Mgv8L|8E@Bhyi;J7`6e2
z$M!r_*gTZc)E4#UpK14zihXP~^;+~^D@m%gdY^m{E>_p`sU<i(m~3p}7idiAIgovj
z-EC3+^mc|$ZQ+zY71&=ntBFre_)64UNtldITK|<uuNE+@x5!waov?W$l>OT7BNj~|
zrH|*xn1UC|7Kuvm?{4}Yn;r|qB@afV$pmH2M6CI-YX0eDpk=DKWM^G$b(N5+*?3l~
zaFd{o=zPRF#LP#T$?ou``FyuXwbuG+bBl~K=~N}1bgvk^@nb#x_3V!pgd%>n456qt
zJ%xD}icqA^--PA-ZmS%@N0aM(a@PWQ|F7Fg`tENY>A|QO(pus8?0ZMgiwA~5z9j(W
zMc^}G7AVFhMla|p>|j5(*BygFCc1`RYs?-(^q%@^V#^@ZMd0{KHO=cA-J`DD$4*xa
zw=<>pC$6!<fu#q9+ajI$0rL<iPwScw5<4>o7a2`@G}3yEpBl+L>{;ccUQTf4cbLha
zri;zgsg1hE`ph+Dghzi?P2F?ewUIgda=K=j(h8Ed+1`<9%r{2U6^^!$BC6p&tqNf8
zT5Zy8Wb{NOncD{*GG7eGH(_6<s)JBkJIH&7?-z-@HRRB=vl&>#XMkauDl;v4!=#EH
z6>PA{tW#nT&mU4NbPtJ4Clr03ps6AwV*vH9%%?Y|;Ht9wym`KUB4K4?$bgvd#5h?w
zowLI1uUv$#;{1bax)?~j0jEh(c!^9l=c(d-=<@((_C&0R;_{IxTX05`K1DeIL%utQ
zkVp`%PyIC43DeM0@z+&DsAMJOOJiw*uzxikB}odssdn$y%xXB<2Au2k_d(q9&t9xL
z4*!P>(2Q_(q=HVv#~#}bMT|2P%by8=%1j28knJ~e!$2IJeT1>x3s{COWV35E|8Q`!
zj5J(DZWtn~+JZPZ%EFgi#9R53?}7Yb!&PkM*^jS2XZBXWN|MrwRp%7vjzdOi0QZx3
z;a5|228gFo15nA~L1Y9vub%8cfQ^K$Z`fa)H2YQ4t@nc0Iv7@i8LdOoBIoUB;v|;c
zHwmWuPsThQFE3!lvFaGv4H3mAzDO{>91?_=WP?|orrdN$+SO#}{X#bCMS!stdnh9v
zGeFtS`f{hg8L%z>lX|}0fX4Juzt@!T-jQmVp?=|r_1GdWhPl|1M!<3lv)I$|rIDWL
zRCYZ~!y%9_!MdxI`Q0m!j@o`WGKiaz7am~I>U`y1Ix$y4iPZc=qh6)!d85U`171jz
zn?c_|(KkybN6%8wsLdsz2E`I5y(BxKh{BO+ZLS4PZCCkrdq^X1n+@Jsz0vd)L><S6
znn@7e@V5gU!8FEwNEi{$?`f)iVc*{}pmw>{<bt4q@H*c|{Jj_n1+GSfsKh%0Rbom>
z4BKFSGeCND^rWTQfsZD@r*E&2=^Y~({Va7%@zU-foQ9bk{^}5~Sk6YKy#iRgGS1PH
zMg3zC_{pV?5vM@R7rR(=rLu073|JJ5=1HrmnD3xs>S=>T2vkl7tUEv)!f#1sfQbdS
zRshMql%EWhKy3hM;cifBjhzXU-4K-<O|(>Fw}Hi<Pt||{8o^rv+YN|hK9f}_*6w^~
zNO}>QaO-AU6q)K<xO7Nr(c86%bM#1Eg|QN5A>cz)QGyNP{+;Fyc)ESr1cu>@c?$QT
zY(6Px^~GlO-RZFQadiE_Vs7*!v4xShok>1chI#4hX3YCQY0*CVi2%^k)tqoGXNJnt
zd+(>*d8hK|E-+0LtTYN=v1Dj7r>$N#KyZtnH8;CI<Z3d~;O1doJf(Cw$s7e?_!$Qc
zxFyT*XwmGKM{(c>zd`iA_&SV(1Xu&Xa<^1Tz(xv_Rn%JDidC3|6^nT{cz0hDj~Uhx
zhn7t2wpAKzaGLt$Q(K`vWUw@g6^g@%?@}7znYT5>&FR*X1udsfSE39^JjQR`>QlBK
z-Z~@}0f`xaJpLYp6tWmqTt?G6lX*-zRy~wa=f5+ff+|W`X$nUV@bNWl8+S=iudZsw
znfGZ)ZTeO0aKF^$;{}J*RF!5opwq-;2NaS!7WBtF<cqlp?9@Z(ilk-^9=)n_Z%g^w
z(J(z}UJ87%2z2;KUqZDuhnp_yWzU?ifKNAC$sniN-%c$NS!Jy;6Z4?@W%TiQ`uycY
zO@Cq&imO3-4E4VJV813>^*?K#QFPU<-s=y4m<Zxq`FdPOuDiSffI;>P;9WV~>)J*k
zG?HR`x=^s7;?li|3h|wm#X}K(v&gqvEO+z($<pBiwL(VqLR-il{cfA(#Rdj$C@s2Q
zF~7xqlY3qVU~ovgEAdyEheB|<1IGDO=20+>bY-n&+R?ZgfYsm6oxB~5aX3j0$}u*L
z-P-jEt6ySfVqKhSXAPw~-8M!IT3ntp%_bbz#jjvv4WU3gGp@=u*L%q-0JJj=7&Q1I
zs07*oTywXPT0_ErV!b49FLo{>i~+b@UW}6*^f^d;gh0qB#^Og0>U`+XD`=(_@0a-*
z>wW`bKyf9O;J8Lq9j~mF_|$~z#hh{_^H;DHBr9+=94mW;^hjnrDOU+@B}=~)&Nnb2
zv#{|`n2i0pB51`>Et4*VvkZmi(R>x+?K-&j<qvqqLN8#qSpEju^B#vXcwfPKHaVj<
zJ2B=>;P6PcS+*Yn2i)BZu9aeC&czhKLM;{xn^-R=)=baHesgTKMPBj~l(kPi#MLvW
za$QT}%5~3)H={$CKYQOm+z;Q0g&vV#5?*X57T<#+lLIKlzjZ8)0;{>|*_>Mj?SpgD
zen>WTCox0SSNryTsq<fl21F35yFQ4z6{nt<C)?qLrXT#bn=8Ylc3qt3R@b=!a6~qD
z+h71oV<%tzNdOB$1Tm^a`PbG&p3&wQHp~0q6RV<D0wNJ`n%!;MG9Qe6#!tQqzJ{|o
z606usHkyy|X@HG9Y9ueTKk%V)j%`SGp=2W40F5%7{jq6!XZ3U7x?eH%^(;9k;DT(<
zQGuV+PupSDT79W#m|Wv(`Zx2oW+rj2s?Bh5m@ql+=oLrQ+<c1lBBc{ro9~I@<m&As
zksBO`dJ)VHq=YN#vlm;Xs=y?{+QecxL?t90QGobHxKA|<6s#@Y{KfYfRR(Y3!fG-X
z+dX))D4dEP)z5L-%8c?0qKd2T6F(MP9M=l?n6w_h5<@|A_@cR$EGXTzr4rCxo%k7i
zqlYoC;~5o*5+7!>q$pI)p@dIe>#jV;=SI`2lZ_H*aQXYI!ic0Y--Q?Td2#V!rS3;D
z@y`7JN~?QEDovUiS*14m3d1+h(|zN{ia<tZU%-WPdh@3@BP7izmiETzu<1g@ZCi1-
z031(GQg<cllKasgxPnf+4a=);H6(uV)-Fs?Vx6=D>1jN7EZQ@|&joW8bG|ZCLJyT*
zrM?ePhg8bXCMCkA_zVRyy`A4if5DuMrjczg=N249X9^D=H!|2^sLc6xsv0;q*V+$x
zh1Yxtli}R)uYzs<b*V>Q62g}u;3Hn}qv_b=r=a5@W$=%29G?6=@prj}6hVYUQlfcA
zq7d<VJZ4?}f=oAdK9PHm(d@_W7%~TH%mKJ|+|dtRCH)vfLxX!9YWd|<SoVPW`(S58
zim`?<dQ)vb%^j`EMfNuMh%bMq;A{)U-27J<Fe8<c^(<D+t`<IErcxkYRzrNVe{pn;
z<Z%CkTnI47ELKm2|6M?V)(7K>9=6*Ik>m^|6{@i!@%FFq!%|`m$@woFff`yfv&`$Q
zz<$WCqF9f|+ufiqVci{)+?{UxDrV5YP{samE8oNNa@1};Wjf+=S-btpgyQ~^EfGs#
z>noirOO0~DSIvA>d^!i`FbRky-1E29@&S)8PqVSyY?ICNb0@D#?P_MP@2APgKg+-N
z)B14uDSY3VI_66Ry=LV533F#YB2`;9p6>S*>!f2AC-NwqWdzM_gzoXd-ke-)dVCO|
zg!_BT2g5N}gz^|yCZf5rsijaZ`%^F(JaCFvN{1w<aH9M7jKu&U%&&ym{NA`4Kmfq<
zjS3i!WsIm5o|yB@3ty(r#1d!8gU|B1x?~9_<{?AQIz|~F#)7RI8!J`(vN<rk;JUXJ
z=rVWfZeJ?XnPRm1*UImkNx3AJn-gHgX-5=GrNKz!i4|<Vv(khxssrWhsx}X0ga&sg
zG?QD7D<%Arub8s_hQE2plOp8ppw!zG8KA0k68&g$B=|kAsfp`b9D&JEOUI1YQOnhb
z@bDJ@U}G^cb>-@unP&f%2<<A0%cZEr8Eg4{nyHSsj)i-c$DgDjged_}7_1lRhRIW$
zFK2J(ZwGwdpOx3iPoiQ76j;3qaDRgKLy263T^ZTl_vR?X?V<Y697!Ceez$4erLucY
z*yj!dW^lR1r~en$ca`ZV^YEcLuD=54>2m$tszRh8&dyjHw3Ckm87C|r<$nptK^ROv
z!sPkWQy_&wKVrY*gvESUoNtfpbyf*k=i9l?ZsT}wtuvAM1Ygw@lV~LHOpzz<xvUtW
z+Fj2y)fnxdEZee3`0=~9g7?+!%?x@F7?h=7-+yvo`z1eL_q~f;-a}CJetfv17V<vp
z9F0>J_csuq-3cRgOHvkxb*qmaN+M9~!hn-}qVhcl24;W#?ca~_&(lyKN$LjtM%kxg
z311-C!@+oqzgPc!j`p2pXlCxmEPmf0EILJbntsi&de@%sf4>A`RXoSP7ytcL7?yBP
zXQXub8`<G;!50J?`)x>fKCQa#lS}B1fXNeuQ4#ewfSI+>Df~|xK(8Oo_5Z7RTUn#4
zwiY}UbGgMnsgi%DhgTqx(K6)5W4SoZ(~(8_5iP*eznZ;u>TcxqRKktVjF(B>n(^nX
zXO4<6`&dvHMyV$%qBwuQo+wULEcC0M=c8bkBOz8rXnL4T{;I~D$))^p(Vr~SAOWEq
z#k(EI@xu)g?;c1&y0<H1ztA@s9&M`33oMzU!Oax$yS+d|CEOt0<(|NjPiKX7-Pt*=
zE3!SlW)~9iZ^C3X<j>4wa+EaED8f?6ZiqZxXCoLbz`GY@+#(s&2U_uD{r%?g^J}KB
zj{WG6hzh%Jz87(K8a$f<Je6uEs-`7w_gxEwgwtT_rJbnLu%0Ri*RVc5iY8wgCUstJ
z^^i~~n^R6qbCtKMH&$^xhsS=4pSjyN^S`5q0;$6pw07qiYBXIx_^T54SdeMQtGDi}
zJx@+Eg+C?P&X64<;SFXEDd)QTX<=(xrOFu-S2eq?F&3y{u)5B<pzPzp*-*l}x!aGA
zjAFyh1lJt2We7;HqAfCP{{pql{t;V4<Qb`rjAga0zeWx~JfL1B!|p`D8x)SihHK>!
zy*M{&enD1uun=hXrcE~Bq2Kan1$7Ww&85MdGI{1zspv{xLgiZ{ey`o9QDm#~q)k_O
zwY&S*#aFHa>pF%liUofO^m`BY(*OBV&t<?%(e&8HI7i^U#owxD9>b1CoKYY?daYBl
zkps59v9qF>5}$@Q=j)>qbFHSZrbBU2@LtiMZDyUvK&Wd`eI(EC=0I*boEkyIS(Der
zQnvH`@SxTsQzR<mg+XKU*f`O~dZW)8QDaQzyBFo>CqEH5^9_|e0eMEF(jfcQdnJ>G
znf=#qTtL8jb5gYM?r8s&lw1O>LF|*{iGgRIORU-G81<@=4v>}xey!Z!wjmzYDAr$Q
z424~+Gbx9m9nCBF6LRfwsbiSX7kwlXs!HZAH|QKLfh>so7%Vb?yfe2Jt}5^+k|@g6
zwB<}f0OjH1TBq}ACU*c(taMJF?+YjkmA(La6ku)eO$p%U8+^Rz7o7Ye2Sx<_&JH3(
z!oG}qf&eo2e4I917aWLxc(}!G<Uh|bGNB!9(vT|wCI{*jsv7bBq?gZ3mRlV&8MP`)
zrnZ0V<UHQp^RT3gghK7C0Bbtm4QXjYRgvlbMd322Z&+5vvqYtF#*dlzbTl>d&3>BZ
zB<KI)hralDp!Fes?=CgF#B4DHi6}9j#<%AEZ(-PlnL4=8%*Hbbq67b|%wipDV%^Hx
z?`Mt=E<Z=Ed*fIHP!Fclud2K^tT^qO>{IF<Ja3a=RIq<9NJJz?-1XRZdWL8mJ>J9*
zpClTxDti>PjpyVV4I#m22ziifsGH**N!`=3yw40-iU*#VUyv&-`|E4PGWG5jTJ@BC
zLnAYt|N6Rt?ERZmE|ZpJmXA-Zz^bmj3>K}$5T~j?gUgyA>~?p9e4@fp;dHqv`3lvu
z9-<YTp&z2sC9h-ANkyKv*}y!I61d;YfM6qGHCs>Z=k~@&<heppM((d^lqYV6YCazI
ze=byx`cIy?IeNol_ngDc(8PkXJ|hulo+ul%zK@1)j41>x9d>UfO$bWe!9=DeTdLAE
zGH&jJ9d(a6gs0j0FVw%zX`_j{FepqR6YrL1(~y|7Vq8Nj?v9o?w%FA<Z~#`36ZM3N
z!C?@INaSY>5Sq&^mZd8ne!HF>biV0efAF3Sv8^?_c<DAnNjh92?G+-QX>*IwH0Fuo
zLLm)D>U+n4u0SY|QMqGdt~BQv%ZlW`&SQe5aH%6Nve7GTOFWsg0vPzfbH}0;=G{fQ
z?n#ELL^Juw!Lr|@5e3JjJA?Jua>W|{%w%WM5L0mtI3fsX!6&lb=D4W9w}uchTm1j<
z_10lgeQo#n3@MFDHyD(JbP6~U(p^%DbV>{$HHd^F0)imj-5o;=qKF7XhcrqvfJm48
z_I#e_eIK9qdtJZ3ye`k-oH=LjefE8?d#!a30N=K!UO_Db8<ROK2ivPTSx;^_nluW*
za$F(?c(Qzx!^x@&qy69BU2+)B5gl`ynYn4zDC*AVy*Zl%DT6>u$11*@5pv-fPmDOQ
z1#0^X`n@6C)U8qX3*DTUps=bjXzQ($aVLx+^D*2!H8>{4b2Ix=MRID_laOrd8&e?C
z`+m8IZ6Y}{W<(KRVzB_PO4ujpvufg^H!jVhdPB{6+2O(^JMTh4_Fk>kfs)j7cLBb1
z@rc`fDOzDs@2_|=Ax*?>k0*-!obGx0Qa~^bu4do%@CN(ic*R41ByLIV)*f_~yJ>}+
zXqYxEp14>brxyI*aVYXQT9nExAKy)SVYBV!zN0x+eJi$sMM{I0DnZx_4RsnQ_8jkO
zmrIv=_Hw}Z*P`(L#nCI62JQ~5*n@hl@2mA=1Pf}z93Lq<F}riL{VrXD<oBcI5THBg
zIU}$-?{A#f_QY3&4qXqtl<jnxtWUa8$@lCB>oF-#K+qPY)lt2Ve1^ck0)~G!TX`l!
zu!%Cwwc?3*L*YcAArcf0l3HEw0jBQ<1d(?g(D(5nJVN@?LH2T?%;$?4+-ioPd`*H(
zWI{HOG?QoHW|348WT8Lb@G`#>SnsX(1j-?k#@y6Rmqm(Gh4Ncdsc81z*=8mnsizvn
z$%4J{+ENDSk9AMvBz;{Rj^Ul0&rXfMWdsRPHy+4MlauQ88fteqeMdYdnh60-HE%a{
zYkyM{cMo3wP5u3AguK*aPbWtBu%vfmHZ`LU52v2kDX=2#DR{`Yu_~4ZQ4qr~%Rww$
zV0cK~z{J8qsqUL|)HqJWtwz&Hdy|_>c)kv5^NgC2`Legu4~4sj^_w-l{z5!e6;6Xr
zS4gEgb~4VtInf%*c=fJBopiqTFP#zn)TFx?j-w8A{8iFE8+j$!4QUY}xuwnC{uTK?
zdd>9N)4=kh$1X)2Io_9S$-3NtSLMmPs7OX-rD!gdCimd?68AdjnyECP)v`}-Kl7`z
z4t~}tcodj<?7uP8Kwm!nEOORKMYcP69KoEH1?jyaF$1~9*=K>WySf|gsE%`mz$)os
z+w~8uA1D2Fn~Wb4z~fi>f7I!sOwv5_BzAPkHDIeuRmVWWu_Wdp6YA1xcS1sw&HC;p
zk+-eJ1r*(hH6>JY#f&zSK3gQ;61EaV!k}x<#-X3f9gUVABd3q_wucBwX#*3>m%Tvi
z7aUA`!Rc@3|HM9&_&=IJ^z-AG;CAdfNtGHKV|k(jag~#^O~2sM=e@$wY!h<=CU+Hl
ze1gGt=1$Tlu>`NZwDSsZ`ecJoZ{kfV^k-lAkC-eM3?mGEi8pbi5tMnw{J-X@JV>&1
z4x|p&^HsM<jjKNd$LOQ(>e=pfOgA_c=nC$HJ;$?R&B`3lgKjD`Nl%yhJLyJ=G26F!
zVEQvf!-8W*74zmc3XL}ADcJPN4bAhFpa1^tPE>oRX#$vqQ<oH&`R<J{FZ6`S)_G(L
zOwfc%6IW5Jw*o86WqmW0sAq^dAW9bdXSScyk>yoPwWdm$j8O>t<W)R>bCwf9)4g9h
zUF64QMR>%)6qVAo{q?7^$Z_x>@n)kZlZwH6S0A_eynE@Uweh5`%ATv@UVRS;;3N~y
zw&KfQb0hR`yb=(sR}bsOToHx`{Vu7qA)8#ECxo=Zm!E{UUdekcrmL-o3zs{OeW^h6
zV<SH*EfE>C`WRrz&m#?`W6ZPIH)bYE<|COiu2}E9BETe8P!u>z1}!-;>hqunraPQc
zJ3Zg}knVlCXWO3<YTCG?YUm>U^81&_Am~v>%yS>8y9+bHFF)9Z*i6M0G6=BYwIWYJ
z$r9PInpH+VW2qqBw64j>%l0O!in4E9_WXOSA3$?Kgd5*=l@=Sb)55<Bd5YjqiaO1{
zU5q8bh)l#0U<yw8Y<w6t+SzBXIb^FWGL>(<>U`yM(R&MC15FXt&r+YS&+oWqZWYlD
z>7qs`oN{h_>TC|LZnHbBMpA6oy9S=#-G^4;HsQ)}>$IeK3<a)-pBgq-K#pSDOXo#E
zYEh&^;6szu1-J|j-(NIi-VD3VWCeE+U?7L#WNR?!G`V)or6hE5|HU-F??{U_iof+_
z-7)03D@1-Rk>q$k-D5f%&9ZO8T$d#DwC|}{Ro5{h^2+GR^i?XApM_^SQB%~$9mC5!
zyP%hwOo3a+nHV06!=6w<CV0MuJjr1ZE5eAx6<@rn&2^DT<m(MwY0uRpNvFeD{e?6h
zS!$~-xAz`zQ0JBAr=QnOI=vaHMmlx?Sq1ZUI%r>}i`R>Pd${KDl`7bgfs1U>YSSkl
znowslkHr&+IbAM1wA(C-d_eFp!ycun#@&6<f<g~UDo4xavHnbB(qn!Umsk;^=Go4C
z+n1dEO_-OvxiU9;O3QCAOx_9QDKDG%?TKp_)tz$S#w16K+NqSA50fD3_iMjZNo{5K
zP}Jtm-(z|016a4$Cd0oBEfZHo<ucN~a(IL@sgYJI-(2kIQR3unaNXtTkgb9-f~xac
ze(yWjM1y|aQwM%sxjIV$_)@mR%WA~5L&?OIW%Se+1iD2*v|>4YRI6#fH6fML{Yb~4
z*2z#M$ym-HW-GSjto}iB@VS9?6byLuCpq4P_=<J;Q{6Ir%O%nxNRSrWzLvuvi5kLd
z9QMG+#8$i-m7f(Pz-Zf%zpb%w%qZ8F6&|$>9f@+vXP%-Zp3HVlsXkeE!Wkr5kPIGE
z8Xgw$Tt+kgkPw*wxTqvYU{YoN&b3>*>oKt691q%t9mg-R!k~;TB0si*`vE@yQ^w%U
zW-ifs;Dox)T>&X}H_N^X<|ZoIh9A4jlchEi<74&@aE-z8<WHBoj)VC)Q<&mz%xg^i
z#pK4poA*%x-<Jy{F@ggprMTR2!$aR@-w%-LCLF+MC$r;TRv+aRNd>FC5+%UYx%RZI
z&iLmsE~AYQj3sPgG@JW6{!#nRdNABLd*VI4?Ev;h_IxJpD&Kiqf!#t*3f5uYgH{G_
z26ti7(V|anEEe(Wv#H`Gi;>J^ERt_GCz)qzLm7>;Dy-EbAjl7fW*MA+Z`U%qV7r!?
z>ho=-g2H!wJr5FT46Gu&WWC}Z92w>Hbt)RwJ>afb^nTc!AXOx{4IMOep2;3&9vOU!
z|GVOkM+TJY^#mg^<VFO)i^0)wLRwIxO`BYQvQWksf)5Znyds6@@&V$tgAM7(*=Olx
z4BDx0%?)-h`9*9e<tu2z<uxj<{w#bJEo+S;2SBicEa+iZc=qFak#qs|&xsWW(#gdj
z1hGUz9RvfN`DNLHke<y^V!|P#f77pJ8ADAHs9&<}Q6XITV7bMYETw*Fh4KoYA~m^6
zUN}Gbm7K0KpNQufVz(WCK=0!~IDT_YhDtfF3q4)ZUs()gl6+4e;$=Wl$xWK)bwlQ2
zFbk4(J>)i>KUI?`G3K;I`#nQFj0tX^xF8*v^L?*Na%8;R(oH+Hv3f6bUmsC8_UMpK
zCL(pb+;)Ni2SLvO6>~EvUXXZXIwiW%klnz>_C5$QIi|xeg0cnU`?TD{mH`jm+usmu
zSJmlBPRlKyY&d<DIYx9MvlE~G_!hc%@^R?LfMK_)hz$<2R3!>;a!CQ-7$vQg&#TMD
zs?S)~r;<b!tOOPZyQHIqb=9HIk(cQ@daF-fZQ@Ou+*hQB{SUSPAchdU5`bCYwd_7~
z{tkB9$#BKTEmxt^p5Z~jFrjlOLCpL)yu0oB3E?F&TfSuth>*Gh@F03w?mBRE;OQ|t
zo)W|Zdb{)MQ&pWn8CGqmL(F%BRgE`JswmO@7!RZMi3;Im3$sws>?^_H>d935YBu`~
zJ?Mc^zn4DDX%;{|WrZj%B@qesy%bkX5uu-8f1`eWvduhSFkL)0^DM_YXeEQy1zq&{
zs8IEwFNIGz(h!nq-!t?u->k#Xj!7!z`bsP3)#xaZ(Jv*uz^F%(`{8ijRdbf&0q9_d
z&LyFCA>(B+)x2^Hi}tl;i?Pc)x)e1i4FIUH<&d)fJ-P22fXRJ-_N1xa6=X(E9XUde
z)NrIdS3*u5D*yOiU7>;8q-sl|#D7wX!%$W0?e(@?PrYjInpKijqXiBB-&Y#N0!H3s
zHZRwVz$N#cm>y;Psz;bRn{@QSs058d^uEMs|M>K(`001zl{uaX<y!Ei%rUW|xA=Qh
zj?2KP9JeBO?Oed93|J`DU4z1}a^K9vf=^~x@X72ucXHw1IbF3j$my!FH1Nk@|4ecx
z4}&0IVvQSh*xGs7Q2gyXRb^U#-@{%XTt?(IFYJ*z_)D62p>c|vDE@J*Q6e}^9UM)6
zb^oi)-<U=u&SUU&oP`|xT;PiQn0GcSTqv}VR4*3!_;mn|9UUCW+y0HE$){o8Y}#xK
zp6D}dl#c`nXBu#Ce2uef4sN>}7-19m_bndeV&8|38U54~5DnCRNT?2C1J4x{_4EZe
z%_;{RRi^@;n^^Gge?hV<r3eTxaD1kKO=}+~-4RxX!V`ydl&*paUReoVWn~AU5bl4@
z2Z3c7xX=&G_^qN>Ah48}=0ou>V(QEEsUml<Fxw8c>C<1gU}M0JYV?oZK^i^qUOwl&
zIlu)W%)z6Z*Qvm1*TGS{9ALcp_kiTc8v_LBYjGqAxZ?W5FHir63lK$ue3~ptTvhw#
zt<Y=m7Xg9byub_DSo*(VOiDZKE$tfl6*SBc<{!U<c}ZkfX_KELAAp@$0a$0yzi&5G
z1fJ8L$|$@6yZ7F?ZBxdof-o<e=Sa>1r+olNrLTv*`}X&1qs0Hn!-mq-R5a7Z&Zx;-
z-}-bA={8;oI^e8@O)zV=H?jVyf6p2T@T!p#NN*y?A7F<^!?ADApO2Rs{|6HLM$dpf
z=-;=9R07YYnA;>&_ZiGeySv=h7xFlmCl@qd@c_z+dH{|la{)Wfzg7@}i_#E`qX*BA
z7uYL)<TtT>3mzyp_YDU)%hD1Yv&5RH{yl3xnScv@{$8%8eVrD}{RZiFT%?^Ai(ZP?
z`r6sfbzqZr0jxX+PNv;D>W^nGFODov4^!RNQtj!adEzh5cC_|S<_Gp+?`|qZoUCRB
zbLr$f(D8dS!>ENF11)jG#9M#;KzViS?7yv_X!pPX{1_br!R;B=t5vB$;JigTr2=B{
z0@mQ(>otO<Qt$1Lo4EDcJw$V(0)HQ~n|B1favXJ`{Q1J!WJgPZ5oc{~cKRH{BkCBR
zJ|f?n%<Gf4@?*S_bIf7zB}2VEbMYD3te5)$IfHmWRp)M&Qzr+A`)UhCdIIY{XB3l(
zRQVV=j<+z6xs3MxuvUhnm>=PM_36I3F*5%?rOx@f(3@>Zpe)1{tu)9;B;3xr^Sws-
zP=^L7AcYS5e4F-b$Xk|4wdOtAEC4xPLqD|Ub#fdW?Ohr=PvXy-4{C$Kl>AZyV)FT^
ztn9dN*)aXRey(A0_iGCUUpjA*R7HiR(tPDs#={i6Vb(;x5Z42Mfx{0B{(iKF4ZAuN
z&iR&!ZGQc_!}sv5xHR)6SESeC9kPv)=-d4!Sg~g=aLgO5@$&@Qd^!8WCzmQ@#DaQA
z1#8Edk5alvj4kV+)6@R;meU{m>4JkgJAX=z)W$c#;WykzrVNOPC&}P-`5wgjQ(o0%
zx01FcUq_?R!>b>k5Z-x*``xFZ;H~VbpnV4xELKax>Kg2S+m|Mx3+Ji0Sf!rld_6MW
zxDP%Sy1L!qWAVSA6gDVW82nJz=S*fbV7fOs#HH16qqc#$q}Rr=4ueEryyKW#pVlxw
z+<l}kV@XlG!d!>H^&g0JUHEAZ`^EEN(0sD^`PQk(M}LNk-5-)CnyK=%=nSlUL3f}X
zm)kPr<6~P_`_`OWs(Qb=y@{=4v%gTV4kpKc;R?>?SXz<H&@Ft(=tv4tLUf~mCQPwc
z|NJ|EU4}blrHgCRW!ag%4&M1;P+e{Prqi(o)8qlODK;byJ8#Q3Qi6-_kEM(vGf4{b
zHgKe^H+_!|$@nZ6V1#hD)n{92X~nwrrZU@W+32UU?Yz{y2Hgvq_!TA!)8cMf_<vIG
z-?MDD;-A$|x&826f@s(CMN`%HQe+o<NAq>gf0(4|+-PrU#l4&I-o9ER-a^sjxhK`R
zyT7-4LtlPY&Z<YHH*S*?uKAb(Vz_X>Y@~|&zz{7~b})WW8ZaQRI#Bg(pdgBQ!2e>y
zX8%v6-1=d(lh<+*iGI7Pw+Dkr!n<2qkADZuwFRo(8);o%>kI^5A*MC!j>@2jP_-Qu
zXs0`lkt|9YZZ$1`-b#w8-<JFs)b)5Gzwie06fz#5dMfG8TQDZ8$us%O<^4YwmKc^#
zcJqNp=3M>JTO$(^v@O?#3(f$yC4u~ie(nh$8YWAhqg<!?#s$;j@KbgSX@teS`_C+E
zEnuKNkT4)H7fqTh6u)^z;Kg*sizz#adxZO#PdmM*25JtYdC3VPCLW<XwID%kOybeL
z=eNzF3Q)6``nK{W(#m$ZI|?^Dy6?M!qJ{zfa?pRRpDOJ7`L*&TJ0GkcSSt1B;uIW=
zZl0=!q{oVGaNj^a0H=7aBnsh8zUMItX)zJc2K92ii0!tu{a4m?cdc&qTT-h4hmaeH
zqFv#0uF4m<7h6(6A!@w}-hW1kOlt$;UO0a7mEqIw>KpX^Cgyx2ueri(N!i?AQm10Z
z&mO2hMsqX>!`)kU7moZ#b6tdccdhyHb5gjI%uRT7Tc4^hOCvhKx8~j1<ip;@!*DF(
z%bkdjfhEMF{U*6oi{<Byj}%CoO2$hVaWD*`ZZ2NT(~Ztqo{r|Sn6r`M>&o`U^UlQg
z0>U$;d)iY|3US|aTYkM`6>)Ii2t}P2CFV4iII9)4qG@M5p_hLWr0tLs|HXIkY367B
z8ub(HpSU#%+WfFGcK3c&0$1xow2hU|$q&hJIYj)RN6C)vcyEKbFi&<jR0TIcCf{oQ
ze2j5S)S?bxv<KeaMuP(4{b_<JrVX19%AKWBWip8}U~AoD-$<A5wFYa=-H3=i>hnvR
zlj?PW7!;c*Z#noAv|o|faL=goe3@Vik;asl<H*sX&(%{WPHNGsT9@i9xXe96Hgtt<
zxKmHaPFJndaWumZ9VW*6?*{REiGTaB{pTCaFB2Z4Vjd)g8~~SxrK;V!_XiJIA;$-$
zNlS%j8no8wKK)ngSlltUCyM>$F1}i^1QwOgJRQyIsbDP)=`DNh6pVw@3umo1p>m-M
z?G;_c-WNQ*$;epMq{<$D(aEoG#df4Se?CwK@d>$sjrxC0r%3E{>QVZ#YXkD_pN#h1
z`x^N0wo~$)pay2TKPZJlNsNk3&vOasm!~(FG9@XPi~n$G(Y(7xQusm7htah98K(_8
zCHl|Vj$Dm{Zq#WrnH^yU4aB^=J5+7E^)xp)Wj?7+N1`U9M9ME*cLy{RSCrP@Y76ZR
zJ2+pzPJjo|I7qEq@YdJgmLyw(++XfZPtxW6LJP6T&bhVredNp1@^;RQqG&?uH+H<q
z&a3v$%n&6YQetb2`^jyT(MtO1H|?EkuDJLvv)xSaRn7Fg^}VD$8-&+~2}&(DM}UkX
zx9S+Y;+2qk#P~31saI~Qw>6K2s)9rW)RE=k`X4u+_f8cOxYF`eD6N;)jc`)?h1WQF
zp{2y@17FCu?6R5<1|7ns+*_e24tM4SPTd%d<N?dz_S{5oUg0cWPy~yCB9d-82oXde
zQXtL<Sj%IsTksdE0|e*id$ZDx29-j5#FO_)Y_)z8+%WBz8?^wiVI+I4D@2oFiqq%|
zi{*hC-3T+fTLW6k#pWv~PDH;xHIU~W?!$b@>Yvqy>&vMvPI*?DdM#?xD}>n$IOx#M
zoxM5IS9=v!E8>~xe4f52?pgDkmbG*$Nw5DnTxdrZ3SU{jWd!2=|1Gn#5wC&3L-Q@{
zKjGStzLlMvm7qB?K&SbeRNTPt`lToHp{bfOf#;D<<?96=KVr@|yf^e3m?U9{+3YrC
z&$q5yeR*fd=!F$RQwW5fkwTDc09QmFugtj5^LLuyjnO9eUUqLmEO38>@NqAJ&Xvyw
z#WDx*vsix=>5^Ce4oYTCiD@ek$C2tsTvkakeyX{fMf&Bp#p57FFh@J5_c*2bQ%ZG@
zx)HP2>*qQz9|;WWyO|;T7OG*jEsw*?Od8q!ie!h#AaXl#u4Ww+>*el1Xk>arT^O2o
zZm7pEBS8}?R+bGi_7j#eT!(&!s<Xw2o1@C<_@BnqXeBi~##UxWziwP+mmUUOoLI3w
z_jXp?^kG@4v%oV;ZLF}&1xRixJ{Gt%T-{W@&pE3^Y;js{JaFc5+324=9uxR7S4|w^
zPPTPjes4mCw%d!aYNVWqkTZjOlXOSw!uHxW`j96Od3z_=7p3WR@A}-?`AUCj>`6{_
z?@aMrol<8vnvXFy6vt<5^MU1c29TBoj`XblJxY-{N+4|HjJWZiEXkg{s_VL4kw_1+
z(RHxW$5prr7FfT?uUYqO;BG}Q!Ribx?R!?>3BrRnnY}Vo1r6k~nue+#c6dpQqF+pF
zHWJJVtfJ|dcYu3Vc8VY!F5LZPwk^#<6pa>(Y#x6(!|+T#l&9W{19=+T`%1#Yio&Kh
z4XC<EZCBqTC!WIbp%Abn1*odJyt~JDfn%=b)~A*j<~gl<#QJMK6QT`H-H3G8BxAD4
z&c_ouaQtwy{BMB|tMQ%B50P&4PaHz_pV+5~TPn1aTG8vijwzmcRvLQf!FP9-znTH~
zkVb41+mmAvq!cJAbip1)?%g9o$f%1*I!6ESdZM?IEBfXas1yNF2x{bIHz+19>uY&3
z!A+Bl&8@^ef6&1ZII0|TJg76>F^vaXa<j(=hkT)DFKaLA(Vx2rI~d2zSwYV~^wo0V
z*3Eq+UMewW;pqcY58oZnUj^RmC{0fTsnXLV;a265#0xoIWgFdZ0e<v!iU{+%hrGCd
zuOP}xY&?H?Byq1y8>CQ^iX-Dz{8g~F(u#6MPrkZmBY_5{Hn(bAwWCvo*}S(g17E0L
zK3fi_fH>>5DaBrn2c|hrHRzC4n2)QDbx7q_FlNio;}|gV8D#{p$Kz89iOrRK4!R9R
zf;gN=@gT0`Rf_9n5rYf#8*ObWi7T&&&txux8Y=lnp?P=V)U?Kxw=e*w2?&Kef8x_d
zu=L)VNNzdUW_)+;QN9v$?j}N7pb<yM-U|{~Mc#BZP6_ES>&Ro2O)MTDNVNiy?qd`r
z_~njJy%o57&JFv7-e^{F8pVgZFpMvl2D*=>8yp-cQ9LWCCr%;XnxAH?3MHw`bWyIi
z^Z$-7?(<sQWx6%v1$kPIU+wMEaHh_Z40E<ystGFjq{tdvifA_VWQrBh-g3EoT{6kj
zebzpP@AZv2+$C^^tdzS}KEKzpV?AHmG8R3yyE4hFOGZV$HLGB;VTMCI@QX$9)%zQi
zn-gSxorCwxWjuMh2g1ZEH;sbsmYju)euGJ3275Xaz_-!d7e&54r17XBzo5h-CrGu7
z0P{=M3KMwmxxHo42~O|FIHSVH@%B4RyeM?tPGGa@UwJ6V9T8y5TQdHHkInqT%xn6X
z`tjkuJP`63Aqaepg$Rh9xPMAUQ&AG28^Ze^-Bs~e8%|7B&3a2W{4@4-5&lL={m!^>
z-~BhygbM|m5}{q&-S5?ALJY+hTJAHj=#tj=7*z>+>m2~8PMuud^N%`sToIC84ijx!
zmT<CYda+111wy^iXD}FU-lLp9hUo+vbgB%GA`JB{)e6I48lJhLMfzzn37(a<ZAt{<
zXnm^)N$WPBgm2G|*_0~=BINamuz=#Q7w%7-2L%sP-`)#4S1D4wVwZleYKRHmUk>ar
z90G}ANrzejY2hr1#oDisof&e0LN}SOr@T#<ieP;3L5ekx1X-FAA;EcN?bWHbyB*FX
zl*+;Tq|SRS<Hr2*CK;~u@)RzLN!Xcf;41W3;>o#4Z^A7nE{!+;#KKdb7Su%xq|Gwc
z(%z(mUGbU{wMkzzr)8;vxu|$;DV9r$oZEv^8_*D5kHUR%KdZL-2%@Pyd%`l;k=k8y
zq<Ypac#LDbmh>-v(v80}<oj4`>Hf??Q==_FWZVWgq<=;t(u;rMkV9BHpB;cio4paF
z-5vYuKO<sO+}B87cjMCw+32zfydPSOj<(wfXh%eg?;j80_P!HEcG^8|hnT2jj^Q}u
z31rCr)HfF~N5pgGZ@eSP%46&v51~SR;kgidbeJ}javISrKOJ!toSRqT00WR?wCdhr
zztdRR4<hk#c^5f79PS2V@nKpJqZJ+gig)~dH<a;_mo9f<CH4L&;o|}{;ZNC?OKH%O
zh&?aS{DJ4NoN{*s;qm?St_}mvF-%GPox?%xmyKGMduDXQ=8aJ`4K#Wa4jbO542ykP
zxwU_qCpypxM<x^O4jU&?Ych0a0K0kmKK|@KsU;RoQ9-0Ly=ck;NJ>+cT4Hi;K0sY;
zZ{&Wm#kbX9_=ZA{4@rt?>8;R2QB5v%n0a5bP~)3|=Ou1ru8~9GL*-R({;Q5IgRP^>
z%+2`Li2y*&UigOb>wlm>9rJad0n=Zd{iI*x99Ber>xh#QAA<2XKfeCHRDbNLkDY$r
zRV!NRgC&dqiY1$HAn<DdX4GEz@#*r%M-iL1I@sZ)!cUk6m<`)F7X27^4pV>dU+Ynx
zimgKDrIhS7EKJu9)@I*?`Zg9}o-78>IoT{A#QXPM{)0T%I7e||zrwqhZqzp~l`4rs
z6>nSrV7n~lugav(i`m9GysgPO^X*PC?f8X?=+N<E1Z$IU=}SN2EPp|c93|qFXa;0O
z!zaHp0Ni}^Vti{}Dsqnz%sQLGry2jfEfd`ba1GIuRGYY=JVZ0fH7$adTEINfv~HbJ
zL-@7tWO<0wdf&j89?y?&A_c_UKgWNeSU+7>)6aDcRheZS(om+r<j<UtxED@_v>|nC
z$G3`Hb8WBA+U|QwMB*k2a9rAutY26L|Mq?uf2(lr!mP-lv>}c~ycKk=Xi>?OyHo@8
zs{#<%H0^CIT(yXMp4iS3U0znKpjWCAP5nMKQQ(6bR$Z*HotrD9Ue+$2b98-kXR@_o
zNEDA*Lb2cJec^J5G-%3zseWD_ew!1;k9J8`H~z0tV7rIKdhhL2l<~Dtt3;x<KgHoL
zNe6Q9O;^d9F4gZiYcTuoKW4KhvTF32W14DkVD_%tf{i&%<mwlX3@!#D9bT+z*Z9D5
z$G4Ei0nB@L8rj(PuN|E%*xrn%04YllHriH^RW^!wBwnH0;zbYoVZMoD;N=k~s=9&y
zSZ?H-Tt@91AJ;jym=N-%Mi1P4&G_)(B)~p_!U@Q~i@#1$TZ}Gh4fLq|8@$Yse}SE1
zFJ|B-bzxMIS0>y1&k|QOnYE09B()S@C*=SuS#40U|Ni*sOVsL;v<r^8g#T+mRaB8Z
zBhL%KCmuN+`!-1gKkF9tp2r7$9Hk`57*xN$E6fW1^Cva2KNA*lxRtZStqZugvPFg$
z-TUh!${y7_nqc?2<(2qpU<-MW2N-rl#aX}>BliONf*Xi;6WFb9pY9F#U_j-r6_jm<
zVuNJYheFSYgT@N4&W+sXesyrFzE=Q{9-Wjrhpz>t%%atf*SF8T2q9*V+Mcr50KSje
z`x`EwaahM!8j+3b8K;GsT?M1a4&Fi=h7m2FR#3~=1`HvG9<Cihe%Up;Cfi#sQ~qfM
zzR_90=w1g{(H$Q?%$+T^h^Nsn2)NY(GQtx9bNP-mIT(Hv)s0ksxPF;FJ@H(VrX)H;
zFjm_0H{G&;{!73z83B&Y4#RX5EQQuFa|x!<k8)H?>yvHbWNY7FWyiSonKr$iEAit$
zOn>+#!S%c6N)ADPneWd20Rq4rQWVss<NqLlwVZxf$F;RDnI6IcFLMVbTLCIn+Gylv
z-#mN-*oedu`M0Dw?Mn2^Y1sLw{3+|$5yA6i^<*aXTUiz9YT{$H?`}TqwWHY#tAJ!(
zO}~MeMrTE1oH2mdCEe(nvj@Dc0ap0c=Ga(MPs~P64rJ${>CRYWC2jm1z&A@PPn9i3
z0wg|94?eG2Gw)30)^4M1VsY&UM8$wW-yORLmRAQq79+{9WESuH0q4I$Ld=sg-g?>Z
zXx=;WO<ai-5sQh;pw`Qb>f$d*NL>9JVaJv!Zi9~*8ZU0?nhq)w=PDFw65lQK0mZt7
zV3KaY)3r46ade%{IGy%B$C+)J96kYUKBHBqNoNApLsIEq))|;3!J2DqHR&|?roD3v
z#nK&yX4XkJpG%SDQ4o`>8(P`d3_Rh{&yxs!!6dhW?QkhoWChKr$7ZAQBW=ohjTUeB
z8%rv~?gAJEeD`N|#V}GI<+=XNocm3Ehe<X0-`K^dT7D`{g>MYgeL%C*seWhk1}y-x
zwENvn!8=C3>6YXVQjNKzo$V8*AuR#BOp+7_`fTg0DS&~fabVty>iZA`gtVme6vf4e
zryzP4DjJPeqeTyA-J7-FIt*RfcGenaZV8u{{o=Y+6gZKA`CaQW@dhg(Te#}JIl6m8
zB=|r{)7!kcC#L1lBgd?H@7E0ENorAofdu7Z39i{pt!!jAi`Cq2*BQ0^MU3{U^F~u#
z0;eT3<zBGliTXi7DlsWeimc9h?R90c8<*w&>lc~9VMDn(QRBpBV%e_a8eBee=cC#F
z0#boL=v+vRi_K!D?Y}(_4&B4y8j1EUO$5}d?dOw6g?gNC2X#BXQw1E8ZcVi$uMA|U
zZd-G5XB~vEzMvWabHvw7M!+)U{*tuyHBmk8&+afKueTn{=x>y~_FW7?6j?U-_{_@9
z2b>g@UsYGbZ*Q$;?#$_wK7S9>)r8KNPE>w01s1*i3(wJ+f0EPP-|}}+P4>a`y2)2p
zVHDOoh1}emi%{v-2tY&kTI?IZ5^^^g=476Hqb&pbT+MYMivQZ@0H(~1&Duk_DE;t1
z;=)yMY5~GR&=Qja{KRWLiChJwzd-c?y$ZfsHrW&}WY^!Fp7P8^u5po+8R851M2UES
zcpATQCokMUHQXbrlPK}Nf}Aw|F9_!LfcXQ!W4DUPFi2Oe<FX<_b}c|Q0L>~!{CSb^
z<xbD;2#4rg===`7`l}1I=0nr*@;*d`^JkuOQb;SdV@&8CW$xaH7WJ6cD?amf4%l5h
zc?Nlww8-xL&^@gvU>81gg_aXO)R|qx=;J!eCPO0V*jE*z6vDh8QA#%d9*e9X@F`oA
z#y&|`BKkx+Y-H{*d1x5~c4X&rzH?Q*ljZ;zDj5{3Oc2IHLbj{g;(Cc?33okd;Mb(!
zE?>^b2vgcwAaGD-XQQwqoNC>{Lyl8Ux~*p4SU>XjiAJmWTtMFZjS?yC(cZaV@m4k=
zwDLACAKt=U#58ao8T~Hv6>$}+a)ep8X85tryeOF>;m`h<6)Nz|w%#uWu>Qhc)99g<
zmN`U!-&@<Z^F_HHyu&A$5OzJbQjO;~SJ{%4xBzrO-6=x)K)GCE%xyq{+g5jJpIpVS
zEu>HT@}vF{Ed5*MNhqiz7Preh`mYUFo@x+V5o99LVf#(|k`S{%Q(Nz#>Ga}A50LJT
z+1*ds((h_a?Jq4aUR&wBm{l4~e>7q6+Ugx8s-XFkq05{`df6XVB?s{d{q381F*uWa
z=B(=9J<fYrvFXFPVOe2Fui}bSz#G-i7sEGk&^&o-A3G<qO(t<puSOpMKgdwjHt1kH
zT58Q}+Oa2C_>coq>T2H6&Cg@K2kR|$UBl&>y-ng8=&vifOirpLCsI6!BRUW1=+-zQ
zd+0NEFLb)!(>*xc4c*rA5(nNURrgexjtQ_6%Z=C6K))>oliJk<FDo+8kNG^ZUKyZ7
z>J$8yPgkG#qPCafLPb8-yInE++~%InF3Ui9bseRWPEIk&j_(aBGj*t7y&hAY9g6jF
z_s{gHNfl^c0xKGM=JBzXKqJjS{A15uy6XkSR}O!_GOZk!!DTz{uK#A6o(o7k)xtWd
z;&UP38*`UZ<PeNm;k_zQF}PLlq94i(cjsp?rC@q@6pW<Zcmwr7o7Z%&ozWXu6}CmF
z1iR+3NzcK`1_osy0@i8(Ddm%9-}|?e;(z~1z-EqgkvMTiY%*4b%`*Xs<zx?;w<dEk
zRO{=0O4)P~>}MlDBfamJOuFudoh}hmjw;EcEzex%YeEu_(N8#y0g=nTFX8)3Q;$ec
zW*ljp5Bzq}x{uUzM3HOqAUN|zIc$seCnqW`YlL}(e$wN@O)vPV7L`jlabh`wvvWpk
zT$FS7qOMG?!(WXd%zaPS#~oq)LZwsG^bgn#=%8Pi5M;W_*Gs1yAkOaF>|1p8-3xGM
z<7PZER`K_slaTtX!N&SR+|MH0ws2B22$iT+;k4)&mDSO&&Yh(}e22U?13sBY85CKX
zK%`mZ*e^9&?qdn!FuiAWB_p<dk0Z!iy54)b|IDy98=&y~^TTytZ@pR>=IuF_p0xe&
z62LFpPftPfBB_FfTf-i-72nl`^@+vIT&Daa|BWpTIzRAfW2h2dWdaWe0yL3R>@Wcu
z8(RU?1cGd=oToDz&^O4eda&-%dv^`>O}Pz~R1$P^*bvG|)|RP*{~z=xbWffSHF*tG
zg}-U`$ozMr^Z=TVt?U!z$S+Yx7KJnG{?RSaEZ4n1yY>ULm~-rbz;BM}laViw-Y;)7
zB&JF~Igu!I<=3ts661}n^HHuUiKkHkMhzSSK1Oi)YyyUjYG?+jw&<*dOmJ^(Zf!|S
z#QoE51oJT85oWRkGD>xNHo|F8H~$tiXY0=bBPXkez9Ub0S$@{!-r<^c+k_`?{*blL
zp&7naZvVQZI%4W^_evoJ1_I_=ZgELW!Tc>lT=?R860cUfw&x3B()tO=EaY?S`0G0L
zmjtv>nz$OwkGrDkf~x<+1xS30nb#QU_7}nu{do^+w|dC=n+R4lcMDunC-eqG^wJCg
zwTa!F%Atxy)sycQ92FZ^c0}>;-Qbn?Z?T-~Urg^fwH}ITIMtStnLGQG*6ECzj12P}
z9LTHQIh+Q>xs7r(MbN!cbWBo2X-Nn$N8mdYCYpR)>A_p|`t<cA@0-=y06qZIydEGy
ze<8$|SxhmjaV?dKe?$iOFX*_tRuefI{<%A$+d+<2*Sz;3A%O*23#QSNtRf47bs0C#
zj!Fn9rF+zx??AuU`NrsGi4je#2_FXG{KF<wkl47z@cNhIBs~s1Lf`I*^<Lz;c&yD1
zs3@Dr+KBw0jgn0j%e{CP;Ngj5#Kr+woCKikLLZq2(2-6?t0)4z*pG!_<z0|%ObPYJ
ziQ(13$;X&cNm7bfA<2N)8-?l|gEAkxoQN+<nfwSsK6oJWbs#P(9TsMcWFZM=BG2U`
zCVJi^Nog>^Uu8Fk23_#w8MTy7IT{P#2ojOqfDlLbYkB^3kjkQA+USCQ$VR8KM_`vG
z>%;O*?qSn<L>D=*;x;V(x`#W8cm9Bi-|)p;yPwAF+5RfkNE}Zfb^R~O@@XHPezF&!
z!J}&0^D+vIt6B4>7RUO%1&mrUk7(gLF+<a=uVF!1?A;kIkD3TrRtx}V?hgfb5`WrH
zQ@7LS_S_%cub=@ZZYN|8f+Y(+k+*lYXc<s~;h`Rt6R<G7bob^spw*BVWTwsR#DY=t
zf<Hfk1}uG&%HfDvEx+HMZ88uc*iLA>1)|dUMvleL6*F22K$%+$9}n%OD$<FRRmvI>
zKyW(}gdYXr(9r5xF&?m!g_)o<+ae%a-7AlOK`j>nq|^Rl?YGMR86OmuS+eB(&xaEX
zPh1?&YmwnHp6IJ@C1>S14J4|Cp7!QJjCuZCL6Jw_lq#lbpCHbF+*u=ljCP%M=;)61
zK@ZUCL^FG9e|CU-S~sP9?kw&1U99@h_RgCVkdg%%ni@1}mD<?#GI>`*1!0a{!?Il@
zuv2;@i}yT4p+jw-c?UlOIyq8cEl%)sZG*TeUd;Sd5XN>ux4;LeU#;y}m17mDz~IZD
zXP;iJ1MWv^T|ggU@u2U$w5JfeDYG~0AtyL<ICx#ncCJ=$V+@Q;^Cq9Tw1hl^CIjI@
z+ruuU6{d9x=Zzo^QJOdz#Wf|4?r~gMoe7Es38ZS+=OCx6b7v02AN!lQTKzuURj)JW
zLGe%Jy$|_YvxnVi_5jjs*KX?(!Ii~rMO-Cp#78#yT`FjOh36mM*?%}B|4&8&m<Q+w
z*-JuLgmVar(PWm((KWoT_qB%l!N2&@Pq2KgySM9QRQ^vS5@`$$Ty>&KH^R1WuPOO>
z`0!wOC}%TS<?HG3?v06^VGWX~idBc7b4Z^Up%SmYr(fSG9_2pqI$wjp@3uhvKKzrD
zU;Y!&;Qyb&XQSb!G(wmIhC7kFns{#lH|$_@f>TRtKpR!+vE1tgIBnV!1^NdCB?hHR
z@wYW{o_v0t+=ffWAd)qU^ciXO+bi<fnX{dDTMM@gItf!v;8GB<XkDm(FJPG=fb`wv
zpaxyxNBNGK7}(yj$f_EY=)W3e$|BjnyZ@$#)c54K&A8|1*VLoUo~yC!EHb|G?|BSz
zK#LDOZEeb}srsl*kNyl<X`z@`^+z9wF@Mg^lNl+V8Xl~*zsll&RAydAe;o#RzL`-E
zxtzQAIlR{a=j8qdsgfh&it9&uzx_36mzM}#T%<Yig8HV9qL^KH=GY$&j*@To=nskQ
zhKTfh>i4_<XKxDO;-XAK%yJqC&dkka7@d7#4IT`^B?JaAwfgmr22;RQk!SQWAL18A
zOnv2T#h;6_y#cPJpv8~)Q%%oqEce8Py3RB`PR!kEoDa%1YxdYR_8pJAp`4R-d^tZ=
zq;rD@h<*FhB;z&Hr9ST|y<w3}QF_Znzd=7zpmBqimiCXiGY&k7-~35aQ^1MG#&AB#
zz|PlrPW@aZvT?uu6w#;F*04s`*_`u}y+@=}rcZJ22&z0lkq0jcd=QR4+?wpwz|;QJ
zp$dWnJCv0x!(&&g)@}o}p&tk~EQQ0<Ev9-C=!^<Nx|DQ4j(+2!4<7AQQb6<7KIHQD
zwqw-s#hiIft%E}9Axq)YuP||@$o){#nMftPHAkdZprAOKZgDtCdLzfFG#{5u+UkVk
zw^91D`t6oA^b}yGh`Nl856Ar6t@sx*Rw^}|_zyC+`{^HKY<vjUpWEjT&P)?dx<NgE
zLmY$pYC#d2w`5?VW4=W5R?0m$`sJ^dgA0SF=C0lXJeJK2K80gjb583gCQoDlpGRBg
zU;5y`xNCRv<jrv4?jJgf^YNKT5XQo`eOc=xh4Hm}NgNjtO9-PKI*lm*TAHtQ_nYic
zLJA>mA<^5KX)3S?@_Z@pjluT7v$d7})C2*`dwOo-zOw^zm+I8KhBqZfUsG{kBc5FT
z8Xw_0-_E?zUSso3DDA-~TPwm?QzYDl!1J^MY1^No^LyzlHyaz9>eT0g)EhYPGdRIW
z&55V|IjP)o#Wy9;<|RA%x{_Z1^XQ)O$B6H0Lx|ZN1RucLTx)k?iW%?VV<3{qqjfca
z;4Kzk9&R#ke4pc)3Spjc1=~k1PFU11@UH&)Ia?Z^S*CwmT<y!SpC_P9d5;b?Pyj_}
z40Q3o@VyR%yIS|TiJpXL`@9v!w(cA9<~Uic!}M~$V8Bt6Z*(=w_gWXyZXu=5m6-+a
zzW4edBiNM*Wr0q--^XRTE<>79OAkR2qS*;LBww?#Ti1yDf0oE`nY<YRd@_lmmk9TW
z@%8>Cpcu3ofZFis!Dg|`bi>Qb`i2HEhewM+&@@=Oq??-?L7;_&#nR&9RS!9)0Kohz
zG$=LH0Vq<DITF5qdf-$sA?7&B6*}<D-95|t%iHQbifl=Y6da>cYE-#0f5V(6JHN{@
zGZ_~nNY3OB3B9G3xTHS?mZgw3Y*wa;(tm$VVRO9PI_b$!j-ta%(=$>|4I=Xh^C%iV
z2}OD*lREpW->0U+8E_z#P7@Uv)VBogpuBTMJk0HAxY2e#p-BJEH`eXeiIU<^K%|QW
zfHx@4#YLRn(Wc8hk7Sk*Gj}e=rr0M>=99fJ3-@|6glR?XZ55**84E{-cWL6nu>`rS
zOONN5e*{=6<I80Ww`QCD(y^So%&}TxDxUke;crNP_H-GSCiUfJkwFurG)x`Gr^3so
z;0ux!I<A#%z0}7*dZry#BKmzROurovmjIpYK54@CY+E_X8cSRfQBPq-n1ALGUEBYf
z>lp1;3`Aq1jrRg(Sp_oUcj}0|7BSp#-UFb}jW&_!dluSNp?hFjXRXG@H(M3WY}SI}
zf91hdVbXvA(n4jd*mg{3bLa7VX11yCp2+eE#29vZz;On36#>G>N^1UMP&tM53%9OX
zRn(5n%{9TpfQD1NOI%{g3-@fkCCGq5s5kXUOC0hXccSd6gyd%@yfNKaT2J`}voWXP
z7rhCU4?yL9x$Tc)=YoJ;&)e7Ud0?HYNvi|@gJ~MAqWV2$%%y@8Zx@TQOC7Fh6jQUE
z|BP83t@d|g7_5!=m<xP3JNjh&qrtvm|Hz;pp7SSqVBRR)%oD*f0~$c-7h+ZE7WAea
z)qabl8w7{usY%?cegIzz$`olr`lWZAUQ+Cwqzl32&}3fs{zGz8sNWD@aX5c|MsuQ`
zDL;^TZ)Lt5@Th`##~HmR*U_>7s+(%}!$rdCjsIcI*|9%-mltqY%{u7=nfkSIb>(wp
zw`F7(Kf~wu+5b02@T891M3HZ9jTBP!XUbV#=$s^uJRyT3k9OvT%Sgz{SG=a`9AKBv
z?A68PMe{my;OXh<%r@T=qccOOw94<%Lg>tb5Qwo-BTiOUR!zdsd*c=6PVV*+E2>HS
z8YHy*O9P@-gvNw|gyLBQX9b$+sudL#X%<uMf&TH#a8m}JjU!1($*Yblk1wu3r1R*K
zc#Yl$hf7zxFMgC^?OO)>2I(;eG1+8)vT(%t#f6`1xbE}2<V-QXbG`3*i}eaMuaYry
z-ix^cNCSk0RJyP@;zwmq9eU#{t*$IEtS<J-S882$5?3(a&Ku)1p;ZqW;T+b$e}YL%
zTYsf$lqTUwcj7i%s_Qrs0g*<~VXFmeLh8_L84(+dyW^9waH;9<!oEPK#bi)!kd#fK
z%lAbyol#W}5&v&i8mZK;H=rIu_lInKIzG)n)u}h3*uLzHjUb4QggXZGRJR`d1)Mp4
zr`~;a`|M7^JDc`V%TXNE_Sm<irMwja`G!e50y#qT$3Jl94%AhAU38|Df|I_K{ld0&
z$vv}PZEMIR08#TUkPUZzJI}pCW}*3Gt;u>l=x2~+^dm&8Y=m}b0pIsHJLinSPYJ6|
z+|_$VE_p!MPl+0RX|h-BUM)arPGUX4t=oFjyVf;l<$b1<*N!5P0p<dAv_mpZj7rov
zPI2*M>6n5)H>`V*@@yEefr_-mO_3LD&C=8HKvd1HJy88f+Ht;~6FKEPEt-2xH5hXd
zGCNIwWx%p6PR|^g{BJaN5cBnCkzy+OhLqE9gH;kx!wEKmzgWBjU164M%>?lfYtmKF
zf+<g`6yN^ze0fQDY<j^j;BwV+>26QRhIqBE_IS8bGg+1rpQ&DYxW=0bR;&KOR(Dr#
zT!h*`O9dd?8*&M)IiNb_M|XZcVHsQ<H8^?EL{L>xI3oWiMh;Os<=0J5aK~-5Rdk0L
z9jG6|JRZFVS=%a|OSk1&rbpau_4~9$n+>Ez`j0aHCRAe?P7ku=10Zns)@ZuB95)T9
zQRIU}zeq#mKK&BL5&abq2tz*o7~eBA6jvGi=Ug&hD>I{$9x?^O9eUDx0mrd5b^~Zv
zp{8C+(|&0n5XvIu&my{hdoY$AcFaOH>GHiMtdl7PX75mq@OQuZWSfjpOoV<CIt*pO
znQ<WFh4e!3y+fE&>>(-~8M6M}+l||^$dPxq)HniyMXU<_*;7IwUK@oJAH6@^)?j#n
zq}+Q=arJ6d7sKMa=|*1Tkc3=Pw8!}eHE1Of5B@GER!Odrfs9m5ym;5$HtI%WQos4&
z$e_GIy+lE=yU%2Dc6qX%StF%3_BAh1{b6YOGL(QYnFLBD9V(`adaYRJyJd*4#(;I#
zklZ>XTwZIwoHk!>C!X!e%XA@Jep&JrK~XL0UXCN}$6f#8JH$8abz+uXuQ`@wXQn4w
zq^3nk@Yo&Vcx3hFHYog^X^H<jmm*8mZJY}{nXq8tPos(4*DTx?t9$ZO6WcmVoTK_*
zq(7tetOHndHdHz<AMUIZBg5I3mDXEakpJl;)TrT!0fATK)*~@4m1VWkM$BC)Er}C#
zzjmMzEt@W3GStE<j7ib^T_0H7)BGf)-TS3Sb75@x!$61)41FK=2)Y}-b$=%G;YU=8
zpNcsC17>08EFCLC6+$D%_HQxFQv(a$o7p3ncQU~%tCT*R07+(*%lj9ST*gl~a+^S&
zNClshQ~t-v2q#&FGQxf~!FUa7mBbGt$lX+CzHhxZJZ9u$QINL!3z43EMnc7-N>-Xc
zy6pPW^KRISuZCMvx-kv?_Oowcn0tyP0&|^3I7rVzO(BS)Rl9FTxbJhubV92D_3;0{
z`PmgtW_^rLd3bR&FRX%uZ!v_j^Jz<yy%ihP5w7>VHA8Pw^K@}kr<~{$$W!S0I^)7y
z{QM6wiz3<*>yWiNd0`@nQBp}v9lE5YxVZ&IZq3$4CAz7bulkw0nU2$YW7HNZEyG&%
z8$?I!d-tdKTbcoZJFlz3VdVA_!)O*rAmoU=4>^d<ts239eH`eDpeR%bCAd2|8q=37
z?3rY0#>mP6QgbhD5l#6N9$hkl5`BrLVal#<^*jsaN7bI19lTt+E3sn4_PEEm_IVT-
z8NwqywrvY8dT0dX6@;Kwx#d|dN!~+vgXe;X;w(hoW0$UrpKsRpaOp0e&H^QxmrSgF
zeUcAva$SrXu~({FpMsegCWE{K_7HU2?2s7#4C#K?TPW!ZRDHi`Bgugi;FKjd1sw>P
zBA--yeDBW&gj9NWZ+nZF$7(<~cXF@HPo*QHZu67C;}ll=y_c||gw<4B4C(srtk0_^
z+hkYOn^uOP@(UZw8<D5fb*c9<(!gt)1wY&Fcx9$kjmv7H6sA_s`nJ_afpJ{0=#zyV
z5GDinMOGY4MhW_pih&{?*9F$3U_n?05SMxs7#63*@oxUKuwEUatAf2QtUkacU`$N{
zf<=6IL7H8l)vCCs&_(&5U>qiR`1XKN6{WYmp?IGOmbsWYUC&D^e6F-G>yo!%0lsij
zicbBqvE2bbr))MhH(b3~Sbi3KTARp&AnD#eV)<`b3mfsvN_jIvxceFvkA5WN1v#_S
zrzlawF7!+-V&#g8f})+tXzmEkdWDKG=z@$N$~jsVdV^f!W*y`XZk;e5W^k59oAk)k
z#AKHbOdSX%zqq`Dyj0U^>^8|niX=HQP0~|01P9-KXRZxQ9@RTp`TSMW9l>&AB?i1p
z3~Ldl0Vx@a>jj|5S!=7)5p*H(p8Q~kob{f%f`Y>Noms?4C76h2I|BC}fHZNC$RXl@
zL7c*b-?6I`zuxciVNHp796-wR1Mo>o^*;^NDUQX%Hn@kyN_n*LBN&F27e6t|I@oeW
zFzZ^y{UD-?kWa5p;<?x_Y(1HLpJbFjHwWwwVqaWIVjbqceoK`~C`RFMc5-aIw?O!E
z8))SdxIGQ;GwcA4XV0vYka#a-q4&MC<&`R2<B^T>FhY#BmS!noc(d)-#85reh<~ZE
zi+nbs_DDU$rZqdllliM~Ase&B)G^OaKj{#_5hf5ThC8Ihq**_EABPjLBCNw#UQsFQ
zWw?=73GXxIt&tm{n}Ns2vs2;r!m5Zl;fo@bU0rs6#J+VJbd=o7<Jszb*Ih#G*MVwf
zX-yDkl@OQWv?Q_=(_LdS-lTsVNZRj*6(d#Km}NYeCa&Y5!hULa0!06oDE_2uz%OY@
zFNG9aw~C8!NZH0rO-SdSJ?UijTvqjg_2rv>wIgW-TsZEma<W$j6l3vcmjh4xt;eR;
zy0EQbJuV?*E304yDE5ok_B!*JS6q{0s5+hAX}8H*D;WOR$SONfb@9sMHr(O6U@uD3
zXO;oj@Kh*n5fiM&@qLN8dn(F};#XQujQHE63tR18<fWVlVICHXds{Mn<)bdDLMK;=
z^lQewz+Q-2l*35DGoLAtlj#t=KMh-b%%XBb5tU4+oLQYP334kBJ7cd*Z%@yk{$3&G
zdn^6LK*Np~8&0JwFFqOpi!k#<Ft2oYu4=-+JFFMGw|CMPkzllg$=7SI$g7ln9j!B%
zEx2yp60mOjDq#UYL%SW8%^qPPocGo6$IX4rg5^L$4Dqxl7Bp3W%9D!3e-MJ>wFT~_
z=>1e`Is8ebFCs4S+`Y2oy;$l9C1}RjwJq*NHd~(N3bD9uE3jbt%v2RK$J`0({^Y-P
zx9^}T>F08(79Qp-MAOzg7>7D_cN+K@j=f@({ggb;qjDIEmQ<>JElbRTb0tuF;fkwV
z9^xbP!>vbf0+kQQC>X^u@n+wzLAoo#OI2z~_i|s@wR;^MYT$px&>KQOV9QX?i*M7E
zQz4}G_MT}HnP)jxEcae-*{k8N>XK~E8kQ!M{=ORrtpeS6uXiaPRmXGZda$=}`<U08
z9!z%HI%gDR+x6~&QX^1lsg0ABJgX(oi_9?C+}AxI#`R&ff#fkl^om>v)(BLh>J0O2
zInE7T7o<E81uDt#T{<<;S9GO-0Rm4p5N47fUmSk0_o7*^)nR(&fXZuus!*8`*BbH2
z3|B|8NklMQDy--rTv!Nj|4vt?-F(Tv#uwg9EvN@Os7M<`-MM6ZT7S%XME42QE4n;*
z?1@}$NILf_y(UcJE+^WehkNz|ZAv<1k{&r<UdasGfR-5kh+5U6Ux6D&KY}QgGa9ZY
z{8s9JRD7{(6T!g*|2h_2rr;S(<A{0CJLJOkMl@Xz5A*Aff)8v@zQynVXLH3uGlbcO
z%sASZp12z?(YXj?cTaX^n#V*|K9_$2U%<#J?lyh#f|cq-tWZL{KS$9P8dX~8SXr@k
zRou1ic2Ac8ao<~NgaoK?ZhTtDl_o<kHZzL5<|XnyN+HA4RWt!ervP9$Laneish9j!
zp^qpB<WKpeKDo}dCX=(s^e{i8y<?1$%caL34n63itm+a~sSVqs^u{~dn*M_3FDD|9
zxBJ;0z246_Hl*WVTxlT-zux^BaFwzbaZ)t12<Xi8iXIyk+_!NpfN+IVeFh@1y`8wz
zjsK6mw+xH2joL;5k&tdl0YOT@q9r7xLFo=fnxTglP>_<45G4ia?wFxdT6$;%=>eom
zU|(ZA&-1>2_V;ri`}qEO=<Qr_uRPbe)_unT2R<7m8Jh6CCfhPZEvdt2twjh+3=IlM
zl^W-;eS<0d=QpEHuYG+r>4t`dW^d{6V`ouzebnvAhgkKDCoN3a>_IC{RzaQ)tL(z&
z2u6nc;jcn+BHu(L`cYUKnB=wc)aUuUiO9ftz;ykg)5BKI2V)Y-g4}2_lWrT-=4S`~
z^gJIv(I_IpXNUdiB`V+;x8-QZ#v(h&b)4NzfVZ`2f~=jIXj&j~MVOz`s*1OG5z;w1
zXT2qwa6?19xpuJ{G*(DuRj6kojyA!S=kh#>0(sYj6oW?wZEnk_K@eXyZ(nnyMG8#|
z;~9#jI^REp4HYOdNCyn5V^2Rd!85fig+e;BNDcayvFY!U;`6=J<#u7sd%^oQN$N@3
zXwoC~!)gKHg*W`ZBI;aH!#66()ztg%8#wmY!yQfqXIblv9fX-##}Ma&BCXg$yP`NX
z`l5ANKaNJTT=Q!uurOwP9Jv|KY(|x?=8n@I(P?julbi0h@G1d1@KMQ)Z+)%vSJKx&
zNh4iehg$1@L*{QW9B~mdwa#oKr0B1s#3mbkeO2g3G5Ch&mnF4_zPwpZA5KtQjZ`nx
z<AYrIX^eUuZ9N6}QSJTP7|K-h-K`Jbf-ltVVUJ7<FT0=aTX7mUc+1hO&2e5OR_3`Q
zw%*0Ws+}Jnw4#-}o5`=^?mr^rl5kaG1Vfo;-a)EU$B)j%F7OT9ZF^ZUDm2W~Ct;CL
zMbZ*|(|M(ze%Vn^RLoC(q+^(%1{2iL2e2(iha7z`&)uLiRuD0lxl!2C<BBZAp@(#n
zN+R#Hh}^i8z<67TFHR*sS|ZgziqmV{nbT0edvbSG@|)-mN&N^ei)fEUC9Yb#zR>Qw
zy}ylZD|r*wM>~G&x*vs~0Sv<ALhO@96UC@W@jU(J?Iyw@kv`lRzSY^IN3M2nTDe{e
z+jD5JO@6_eKUxerqA)oTd;9WRxK-|o>;wCeI{^(j+}RtD2QD{;UcMVTHB8?gIS``m
zPK#h%$9_gs!ggKt65r%@hSM|O=p4^P@I9mq5Z?!`!SXMA8!x<N=h#$w`y`Y?e)QiI
zlUlF3IJskSPSS@y5WM>EX5r7kSKmQjCBAz82;$uRl415zd6ZMR<CCg@!`7vS@C(gc
zMAn#CxFvmu7?C~0JNn`2GtgI@|H0tDJlS7a=4TAYs|a1aS4Y>b2T+BK-YEO_i16HN
zr*o_?Q9#J`Fgx|-Gac-pl)EO`qou8)mYk*#G`WS39T<cpeU3Qp#~a0sPBf5f^s+c6
zOn&s!F00ab*mP>TuH)e=O^<M7Q(lg1JFnP2f8L5lN+`npp2tuDuqU(9q=FJe3<_#y
zHB6|uZgIu@A|f!-0pfbs&OfAtKgES-?=Wyl5KKN4ew=j@<GM3N#mf*~5RWbLSq%m3
zt_-N&x9ET1=Dx*@=ayJ1ajh_orw}#{MjK@`(p;-qWDxa~ZfLknEwVT?8zpd9P5XwK
zf{V?J!BOf!0RdSstxH=w=zySdQ(nFe;(<4yC!1|W@_*}u%rzEi6DqRZL<}TasX`oQ
zdv57O*^96ct3b~;98H8<4@Z{n3s9)}-i?TeKu#x$dO^R)I^eXd<!qdWIg+FwOMs?m
z@+S}~K>pSF49LGa<rMcw{#QB4uZD+c-9%nT3*C<=BQr}eo@d&B@Zpu*tmTY3J?pHT
zn4|AqWJUTx<nyce1!Mz$w{|#s(V><S{*Z!?<>r^1XY{Eqk(S(k&2xF%Rrb9FwMUg5
z;j6W~wd}-Xs46mU+!FQwWdEl7g@Av!|CGH}iTutdKhVyLVzmJPQy~0nI>1NQXXg02
zUC{~by8q?<UK!eih8CqQcYv^TZEwZ5R&hBrPz><WrDz_u^h}#R`*{k68fnzI=vohE
zN#$64&d(of9~qIS2h$RrR(K^QG;ZRY7nNj1kmwg*L5#o%SH2v?$Pd5eUXgG!eyjX*
zAkV)2>@OF<Z94}Hj+-_V;_OpaBE3b!JLE(3dFL(|${;)%SNgxf!~3WLW0r|iE&Hd>
z64Kx>AveE&LrS2Ctdj%@4xM8*@w`KAPg_SCS3pbDg}4ktttCa!$<I4mHRA<s6&cLj
z2<8xRjhu(ZG+s?=AXVJvnj6`c0OB<qmacBqZ(s_)s&BpZo~H9UYSd<)Q@!~om%Ol?
zTvIUd4*;;OXb&1N;4<j?O0(k#CqkDaR%VlG({YlmbXskxBhk10eBdRhxW4!w<Gg^3
zHz}c7{yB7R*V7Tu{ZNgd6MAO*22DuL@tkI@E|nr!GH3?l>NN+wqw&ySKYBFP{#2)7
z#-qWsEce3^T$;&ZD()Y1(x}!<@L_OM_5eg(f2x?byDEU)*ZMZJD-dqC=tXKG%T{`2
z2oiLkE>e#Dhv}P1aM5#OA<_IFDz8R8)Q3jc{MBhKRc+))>5Wm>c-|xezDRT_S+xd?
zK&!s8zzXnPhf$B?8+lJEZ-H5XlAT1G+49}g!K;YR0+}kwZ!V4(g<O%d`h7>-Cnz?s
zwUO0O7HeV=bfhgWe9*(RAGY&ol!p#!rug|*U&cQ5y<ELFzV8xa=slG8zU@_b`xDV4
z>XQMlRuQVwuaT?A-Qc7B!~B}YM$>^SE#GGp(^Yi&zT3JxY`4b)wC0k7;bsAO<GD9F
zOa;KFZ}XmH3WJZa41Rifx@RD4PgrPlFA~(@m5RD`Yb5))R4-obPt8oZsGh55o2o+<
z(mQUdrTAnkf5Nj+-Qff5X*0Wn)~~K;wJER9^3U_->_m<}fl5?#Je<pQZAtD$xAwgn
zarfnlos?KUeM0cPx(vO&=dM_ugJesurvIH<`rKk3bASo(Js%0bJVp~eK4+*bTkBo9
zx=lg{1vH_o;w>7eEiS$gwj0EV?J^T@)nbX6`$-NC!S#F5`^lag+Pt5Se9z^p?G4n=
zoc+7G&XchSGR?Zwzlw8m`%e56asi+BQ%GTEtyH%u)@-~xU?VZ0G6IOkg~pY!1GxQ-
zl&5EEpPoE@z4Fu<fsV+0vIvOuaYlLfk;2dD8ui~Ca=9-GdtM-MudJiKg!HnKM_^#b
zo6_~l9q*-Q;+d|DOZ|zdfNral8uxs#--=P0K_tm~)JA%VOM6itRDQBOmGyoY&B9*B
z@trOGq~_Gg_~1FPnWK|?&kR``-=9iwz_8%O1Os!5T{yTFL;pw9CZJ0^VZ7yMC?SDX
zX+I}6f>g^=k_S*v8YoJsfjX^}813C>m>wR1%C%A!PgPY_w<4?|pB6VtOk3RvFD@>Y
zXBi5|<8SLm!?SpF%UBc!N=r*k-T9zMoiTQH_F(vQks+MFw+0G@R=DpxvqqFKZ6Zo<
z4h^(V?Ck6?VPgl$MUqU=?qCB<q1|dE=jqe+W!i}qn<EO_kz{jE!t`zY5rtMdT1^gl
zFc2y?>vYjj;NoQEhP7JZ0}quC%@OeNYYdi8>;e88kl^c3`HRcDPUJ1Y<#%0h?`QRv
z<_{(c)PxCufQBU*Os~z)CSyin-0>Ix35q?RKyx~RFK`23gbKyR^%>f}-ts+$)z%y{
zwXUz<7Z5C15TVffpz!oq4)sA)N4e28Dz0V~(VBI%%tKqEXTj+1=x8pB>o3+bn;7O3
zkG?Q{7j<T!;L`1wuLHv?3$3qT5HtH@sr{L6ao@qlu<=^4-CVz&iwb%l!{IqDVLO=m
z$#MLHTG;p0V)y#01Yq<6Li_Gl@~bjvxFS;%!v*rsM_1UW=g~hr3Tr6Z!cu=mTWQ&M
zr#%X&RNcSq>Zf1;B+U}PACK`vS=8VF2nL!8idvo7XxPd7RUSQV&-%jVJ+E5shfWPn
z^X@3u&qg_=8{lI^U(WE{tGq7^u7;-!X*6cyV-aksj_SEi>;#?6Bc~fY!cBRXhpN=w
zuF|aBw_V@ne+An8wKyhtyL^x=r+YK{mA+D47882i$;A>lC9ShG!`eqk9^*R#-|5iu
z;@Ax%?!-6oaZ&e%CkqRV=yuN95*YNHy^~7rL{*b}pvl*177DJ69cV;t0JtzoSZu=L
zZt6|HHZTBvPtU<_mz+bgV|0!R4Cug5>6DaFbWlK^CI?wnzA_6M@w(^StbkS0aR~bG
zo%DWlztwmU59>PR3Oj^`2_*JIuC)mX!W|3Sa*b~4VAl<US<3G~-2y+3<hT|^BOj{x
zLnF7KdP2jox2J9YJ=R%j!0y=#`gPH_O4jh)Ip=Z6r9$U`LB^>1jB<T@#zw<aY-7%)
z1eQ1hmj5)Bq2jlnjTm>)kz)nAQuM+ujDG2W=aMh(a+%o^&pUj$Iki@JD&;c+a~;W2
zrcH?r3c?P*A)Ycl1BMlsnN1Tff+s3WTd^7r<BeaYq39`ffi>bFU=N5W2##Nm9D86j
z^ugQL#TazQxFHdD^Ykh?Mcse+!Tb1a#@@bq{n}&QVFo?I8eQ9<<1Mvr4*+S0f$lNa
zw-w_e@2mkXKvaG{*G#=v{6P13@DTUeSN0X7*xOdocOrvq0GgAB8Xg=x{P-vxD=u_6
z>gnwM#%3a5Q!{F0-PYxJzI+=W0w$Y&mUpJ){Gb)>Km5HmEC)=9!dEC**La4wf*tps
zG=H?@O35&tf?fg-n*Kzn=4!R^c1bId!{SFLE00<@JCz>6`p~eyi~7!kCs%$9l|f!$
zxzybF-P?YZ_s#X>cnGK6ZvwQ-!O&GFTA>s$p0`JW)3=%V0g4a(IibeV#M$EO<7+yl
za)m;{wh1HY{#&1OoDL>`o@2~x(qT*E0B|-tbvHki6wi&pgb;%fi;+l0_)j@uRD*@q
z*WOg+eELoK@YArw@uKVXMPOJ5elKeJDgCRkQP{i|p1m&`vRfl3<FNakVH}#X;El{!
zb)v77Jo0FFelkFmnxruq_1?Vw7<<3W0;fd(j#t7XAFARRnsr+c=)Tpj)6gw*z5Yq9
zC}!w<Zi{nVXXnZIqOPAm+7}!NPKg)2<!Km`?${4XuCmc2IrxxcUMYE<U;0R$ODt9J
zGWdI!`Vbgu_^yg~iBt2gCxh34tEtI&fvd<8Bf+gXiVcM0^;xlLO~$i2r+Sq={f>dx
ztp~YzctS4wZFe>leoB#+-MI6?SKM`4F2w?b<r_ggWOJ#VBNu2x804)>*2r4$?I^E{
zZUUI&iniVzXp~H&3P?CKRH&8yN0iN&#!ej5v<09J{Pz_#PD|HDiwyNMtLw8nBX8Vy
z0nCpZ-MFz@nUhwf76S}eN)Ay3H^ipDZw{JJPJ5pnZa!zzKiZm6ZV-p@R$!J~`E(_A
zc=|N}g$YFjH!ILs?1nOMFRufruU~n&_oEfyos47;#JdcAh~V7B{_4lTAIqbhcPA37
z!b+z$z-bKeuJE&S&cM5f@hnqUEC{Gj5?n?8G*`)L6!&=+L_hsZZt68HVG*Mr9jH41
zZjV(2Et*6l#m3rNj4Y5}8nyi`M1nez$i&M*<U~woMVmdP$m+bMWW^eyj~#qH-l#aD
z5K$ScklDqI=7?gaLnQsArS>EfawtzhI?-4aZe7y4)i`i@H|a`FPl70RC1frR&>c)H
zP%k)eh-5DrZbmRM`5>FG`HaKx6#u7ClXiO^A2TMDq>t=50DES#C?MS*b(CxKhBA~@
zL5a(>A8p#sPezY!63#WfQDQD3ScArQhkEv6m48rz#xBU*)e3aISc|Q(v)}PM<dEa@
zxp2aQ9$tHeZpl1WXdr3vQtZMPnT%c6zjW4FtWYM)!bzh5J+$7IVX>3X<d&y?K|#&I
z5?c{a5i2)DOfAnG`{v6nT^S99i-DCb-_*H9!9}5!hi7%$b3KavCoi{m8n!!&_qJd~
zQ(b1Mq*LXKqE@@f$6YIDg<LuaA3k3rMp#)4{E5t3GPnInq?bcXiO6lnD#)d8X(k>y
z`NyN~wn*e@)@97ci{$$Q^>?DW&p#xN?-eV%494y)_f~8jU9!pBjM`Y9r_y{Z;|+bK
zxcyK8O964hf7?MzW8m&*A4HX_v&2_Bm$E0D2X}d4>^dPe^U#?WD_6}7*{Jxk4<FCh
zZeLv1j*vjBji!3#HAT<1^oy^iucbKlx-_ur)t4yRPg~-`2O9OPr!e+teww&{&LV+g
z{~T?iVnjp7kwC*B_Cv#Dj1uzKxdtjg$3MP}RoUs&ia-ktea{cpuECWP`E}QZGB;Da
zwo0>8JURCQ&o9Xo#_8@p3n<jD?jLoFsY&Fw)BCc$(fskg#bgg}Ba#^%ux}RCY*-11
zME9SJM>Z5am<4<uL_3wSt+t`7W+=Hqy=Nr>z3_+(8Rz6?-Cln=Eu^NoPl~&;<ZLP4
z*udo*i2k{Og)b7%#K4yAx~lpTa0RuB420q_xSnQb%?UuJEgP{RavGDX$2*JaMo(iu
zr>{qd9_p~kNrjh8!Cdndxi0pHRcGJ=GcGZPZZobN9Hk&;D_7*LAED&dGXq3NC!cDY
z@%}B?Mp5wChhfi#pSz0sb?%Pu4<8s1rTVI~!P8DywX#Nh)7f2PzCN|Pdn2<#m{^q0
z?Jzr{OW%LeDD(@>S<8KYn4Z2mW^-%uq;AN~2>c9~m?d|jp-FZM_&R~RzJ5F;_e+&5
z#zCETK(Z>wXw64l5t+5ztz?$OAxh0F%64rrR}7@5q|hg_U>2mRFyfo_I3k%n3f<w?
z&$K@A=v^HTnib-BB3m#KH(tA)$yZzvkW6I`!<QU<O$rt9o%%Lp#>yw2$TyRDRdJeq
z>B(Arw4}Ajl|iMcE3S*Nlm)AjneKTJ9}>($rLFX;+px0R%=uLh;s!Ixj1U~2JI!A9
z)#KT{I7#Q18QIJ&<aSgBXG(m7_dHd+E;Shsv`J#b^msp?<2GQn;=u*7`(2)AQ9?tn
z?Z2}P4W2$5m?giP^+hT3dUm_ERb_J@o`}o?>+q#3l{;Ek{RbI4#W20F#o6R#YyHz(
zrrG2Uyo{R0+nE+AP8Q$G$~S7mv^Bh*Dm(4YOA!(}h<G+mdMfb7w`mO@5s5yq?{1JI
z&T?a*HGUaK25ptCXUI7?c4E|X%#@$xSQ4)H`Ea}}7$1RLNqkpg<8vm;gYnfT0wdjL
zxZ{9>1gda0dQ?}lJ!bJKS*J9*vSERA1MyjvQTalWP0qEU?vF~SuK2vb-kh?j-?pot
zOIg_`!ht7|e)suDCD8m0<Qd0WAV6$X(B#z))`r=X!7-{k8N7OZwqFy8mEiFl^QJSE
zX9=5J#iLZpHDqHFgiH$#e*jdUKBViBPv+2jUgO!KnuTleRU#JsbhP}o9|#y8A||Wt
zx6P6<XMm`0Nl%Du*DZdZ02A&u8?*rHkPS~WU$y1wF63KFV3o(7IhStP_m?dyuL?nt
z&)+hCt^n{FwKC*Lv_C6bwc%0TMr;R}h<ju<SR3@I59F>5ka~m`s4GhxWH)#VBkPZ5
zZ9;WoG4W*(Ye9A)GBVPf!cUo>l4|sgm+VoA%S~F%tZNVYJaavoxs>UUP8U7-9uu(%
zn84FQp->x7Z6f%7(lGM4Og2-fqR~JyH@L63wpnI$72Ac+__;EP0B7$d;_Hf`!i=xg
z`*BR>Xa*eV<YcQOw@nJU{D%W#*QA@|^Y(9?CTZdt6hx(5pP~zHiJA6v-DNAvX)HF)
z?%{}~%n8m<zf7ex%5fN!xMqvZ_C*6v_3F-c<6c&ifxfVO4$bqTD&15z1M=izQ?^nK
z7REq`{rF&4rhbu?wJN3>F9iWbXh}?4ly_a#L7~d7D1FYOESNJXR0{jyx^;doL-J|E
z!mDBA+MqoGxi*aP?y36bpoGSU3`ER11Ait<9*=$`_Eu?&Oc>1bnIh#&D<ZcBfeadG
z8ji?j(2zWD{m<?zkdT8G6~1c}OgRZJ<FTc~)5pn44h=r>Pn5DHeQ@IUW--+p;GocY
z7pM?OKT>Ql@BDsFrs=@_lO60VhQd^vISOOS{)?PG{50FIrk<n3jR+B_%%e*3T$pxk
zhd^w)xuvK*&_eJc+FW=e(gk8~kD0M#>fd<NJElZ^?q+BG<^|#K3!2c|>fF*YlX1dj
zZ)a$8aM{1Hhm74T*>2ub*w$K@_mSUz$a$*KqEbm8$H4P0KS$P3E_(K5E`u3+iwc#Z
ztwH11-E*H6d*}**wqBh2MZO}3f8pgEtkGt>srLMfAtAG=o7ueh@wD@v>sFFt5|M9u
zs`5v{+1cc~bD3`00+1goPo9=Jn=6&~VED>@Ke@cLBrj?_68k_c*UYQ+MR>;3wFc&L
z&WLOYjp_f7V{yXs*TEC%VwA~~$K)^^&2Hnj{MzJW_>LmfK)sL8Fz?f(Tk{|-^=3>X
z@)-zscup0Wm8tGM+-W0&UY=uT6=#mdV<us0IB;${4~VX$$B@ske$-}vf<J>-*?8rZ
zG~c01T5FU46?N+)4mFj&qa)yAbU-3;L<YTrg<sO2EF!{z$QkslY<_TbOF1Efi}UPY
zW5q1H<G`S<WcZX7EsirkM<SIlpS?z8tLCPe=}KlsI=+6T_0sae&%J@E!s=OAYRT|L
zPKOB~(9bj^J9bcS-O`qkeVli~m#dP_GaC$r*LRwTd_y>C<*#23iK-8n9=?sd(!LKD
zI{BU$5ctx>*Q`mM=QQWsPN2p`JF{LPh2FT~m12mdA=l_vwRMZQbv^^LS%I)dCp8x3
zyOhD6F>hlYetm&G@RngaB?eP%(sDhEd`!|m?3SnKv2h>g%yHhS9;{Actuoq|R8>Kq
z+aM#y?pnq*D1OE@PS?G(L$|B~`KLCp;73oNto8NCE}jVGG&OvWob;d)aO9{ZrG`EE
z^nAWUCp9Hsjzvaf?OCW1(d-IO{1f$S=0^>$4Z<7kF{<aF_1ioGH!@#`7N>U&g_fu*
z<PY&kC1)So++Z=dRVOx<S#v7K$BTT>pM68swWUvB)OA?7AXi^+Aa`IR(I_f9uK?z1
zhe4{rk*ufcdG;BP<fk|InZ%24<l14%9Hl2$_^{&i3bPV*SbJSkVm<Tro+5vcuI&;S
zy*}(sy)xRRj`?Xh=aSLEW{U7E6KzA(mnm4)LW|sN+rM>svnfHfn$WGbyf{md7AAJu
z_Hs|bXQLoh@kc`}&d8F|#={Ge<4Ar|sJ-ul&SYmp%ye2k*~aq*5m~Fv__27sB<{0A
z+KpNJuP^m;g^0$LyU;zdvi1y@Ka{B;PUIAP4CAYj(>sZk4fV-nfuto0JDM?Vm)~C>
z8PH31weJHD(|k(DFT7~rMz$Xixw7R((ut1EO>2Dp@`7zF=dld@F7X6y|C<s#Hs#Y-
zAhh}uWqskpe8!E8y`}ennEEpg3^WI10$1O48ivV|t|Gj~LUlJQxc14E43*(`>G-w@
z(;)g~$n41VI4*7I0&i^Aq-Q9PdS2f8HU~`gUeU|*(<+;B9<~kgZ+w<tFT{IK8d!(2
zls+z~$x~d-7WRye1eN}B&x7?%He`Z;J%sF*lEe8)R*nd+q|9=N{Q50<R`jD*BJm9G
z9z2oE5bLez(9q);6Ak)DqEi6BiV95rkgGENhJe;Up(H`gClV;fc|h`Vw#R9f=W9{V
zeTNN-Y`Kc1o_Kh9+5@XhMOiq3`-SV-izyZOkKRkQ(;=HP42->%ChlTVg=qO8EQpe3
zTcX;b%UNbaBVW|4Zu)l;><r?KARqbq1@yG^FArqa)>7|Vhi*;mji+)9aoy*E;X-W~
z*VQ?aPDLKQGa+>$9frG|GkmZ&ihpx``Ve{2!(XIt(+;8>Z*F*Rhy1zEXqh@*1G<Lq
z*oTEoJdE9?fz;iEv0`Ck($})sYNkGB*$huG<JOPjaO=qOMu=P+Ca+|&oIP0BpB)ab
z4VApDn-@zaE#%YRm^ye7P=<vIuOf1BjaJ8hJ?!#&ZXnp+b1^u)ZL731zFLY<=(}Gh
z3)1CX+kwBv@*IYR_A9=cwE~DvbqB5t?nXhyyiZrZ2fl0Ly@VL)T!>u?d2sCc;f4%H
z<yM77bgz|<R@`a+Kzx7Ia{WrN=-JGzq%rq3AHwn$u5yc1hQ0T$mNU~T#pRdnOdspx
zCyq#<A4wL@enshjL`m+oL_uKkdcJ&{yxAH?=sr*Km-Xe7Nk}G!xFGi2wKGud5<a=r
z#rly#L0utNIw5_=MZYkC<&)XR^@ZG*$;-(w`7zA`ox#jTP6d^N@B3FunHPpgp?a5Z
zsqY_>K^s&J$<t2Vaud@L8pb?25?COxBZDFlgNEx&rJ4Ggtb=9DNWgG@&Rh56RoJJe
z`$-9qDba@x&n*B1bu;qe!s(&%h6bj|I-azoDZ~1G1pzJ3@{Fuy>dnK$pmN)xeGjNl
zd04vWgT+UTW*A0@Q9E~#J_Kw$;zg<%*O3-oEM}$(7*kiKmW0K7ym-MNiWOcV5ad$*
zT;ayMxbb^Fl&0|<%nYck<!=Jh<d6A`1IbcGel@#79+zKcGEfn5vOjdPx1ygc?5ajt
zB+gZ)oSS`qb~LarmqR%PCeEZVr?V8Ytg~FNww+jDJ|cG%a9!6Jk7r3{nbfsNUh3F+
z>I{p0WY_!QE>GbDZqpUsFE|lq`i(E^!}Nx)5&`bExJg3;kFg#{X=sUlty5;QLkn&Z
zR;t<OwiIUka%Lp?IRXr>Jy&yOQ%SPaQ!gnQzTQk_AQ!AKW@UeK6m*p=xep|ZjK@Bv
z-#i?x?%7Oh=G6=0AD8j6?07Bl1#>p>N7iV8zWCDPZ&Akht*`f1`l0EUv6BZaU-f@N
z)28ex?A#`Gu`hz+j*|~A%@4;@8Vj8-O{@0nsanJg0}K)GhM4}n9|;H}F^FA*xkfeA
zV?RRfYQ~~u!Y03wkB_cJ<s4iNx1c6aog<ZpJJN2i^UQS2bDi_}viHXU@d7qwc!`ry
zC$GVwr+l#L4a*tJ>S22vPV)kW^Jq>TGr=r+wtlCBcE&C%?<|XN<CdmO%V^HiR_oEc
zCF}1er3w6Y>ku<dGvi{TdIN$+fVLq>{DqZs6M1~2R@JR@)Qm@8-9+wNbP36K&TGsU
z;E1GN<cUUq8cvO?0yIH2W8ocS`2dU$u#p3Dc<R5>#Gc+>%VB?HIQC9U-S82vSm_g5
zQO^o^jpV_ui)j$g#uLoc4zZ(Y=b8Hpg|s&3?KFcIy%(@OH<9wC^TRf&LDkOiLp^FD
z3}QJWjQlaXifRl#WR}<*FEUIT5lGKLFZm}W2Ez3n3uBNNda)a1lA@rgnUKxg_DWrp
z)jjrEUTn|2##jL|Ffv_n+X*)lvVJ%UdXnIBHN6SaGl<gX>I}ov6i>Cw&6Tw~x2mQf
z&ea2X90{96@GXVf5y?T9&^-0w!UBNnx^a{$EX7u6xTH`)Js(FaI1LEMSAlLa*r|tA
z<#xkae~92{*B+Jr>N5g)J{d%=mC|T@jD)&bT0%fFGmVd#i8;Qvbh58XyjI<AH&>S2
z<MZ<Vcgw51ID^j7F)&uGK|Y14(ZZU%&ur-NTEj_g?I3t7pSN$)toZC6V$t)S+mMPh
zE0iww>sw9u@mLUY7jHn}C<~#}7v{(WZXiHmeo<&|v*aPRelhg&>@xMqTpv;>z|cJ4
zSnOZ<Lc<(lBqlJWg8cBpxVoa5^RPZEM`-3wqOLlVZ)SAXxQ2-lN3k`j$ti1B=G2ot
zY|sUKL_wWp96GBw6MLG!KM|uUHbGH1{z1WuTf1l)oeF=|P+lTLurLi&u|GaW=S)`~
zbQQbDUG9qQYg}E+*(dNBWL9DVs(p$al?m0%v#$~xiWP6g3g>?j>0>*%$`txWom*6t
zEaFx~9#am?J8zgO|M17x!78r-ap;9rSMn|uSHoL8bR6lFrZ0|U1$}W;Q-Rs%#R1Ya
zk7W>nVJzo=aS@adaAJh8Bgr4VALK4p(A9+5nJZH<XHacsx+%u8e;d+Rtml}~9JC5m
zifQUrP!}#^#=LKR!)O0p7+r6Uhv_%~Ngmu#SFi<9sUc@c+Y7-2=AXhYtMlm_%&vHL
z?0Y{g`!T!@4~~$B8;L=?f&-|PgGDely+o~?jl70JDF`paXK~;)Q;=Jj|G{f;FiD>x
zSTV*e2X?mL+tA!>K$D6%{0gb-1#-cxdC6<=O!=LEUJxQ%^@a^-nNk`<V$bZH@z^+M
ziqH=2HGJrQ-Hdv{W<gE6nj5y=`$C6X!Lt({qQh>)(2vXu|1Jd{qLV{2ji<|&T&q{0
z;Cb>%LWwei+_<ZTSteB4?q{-y8w}>R^R;O6>$tU+=!CTvuIj$Y!-5bYaFp8DO(HkE
zt(SV<R}l&rlDxZm6UqE4P6f5{pDT_|Mr+}jaQko|^SWt6-5imzma4x&8UFb$-E-6v
zIJEvj489#kH>Uek*!6GO`e3;Ql!W1E2?zdb|K*?<IB>H$(=jRn4EW{G9&7&knNzpg
zscRHpZiF*YUjY!8L<H{bi!Is2qfvpw<)|&8Ev|Y(Vkn7}V&{JxYnKu@W-q*FGF^T?
zWwy0Yr!*b<xSnQ^?tLlB)tDtVtyX6vWU|MqHh4t@rhLo}nOk`7OLFDKg#P6Mc;EPq
zr~N*k6|n@u@irqd1IF+QciGE<26K9Qd+HRzwX3KtVdC1(TqKI#`+ha?X!#D88oSeh
zDhZTiS{nA|KW8%nXZt>*uSMyDp*_79JX-j)-CDcw`gt=o3F=2Eaq9NIWkWT(7RC<x
zH_Co!jopwzyhx_D|8u>+d}I?blnw(YAhPMKe0l0eDcR<1Vll39_C89W7<)0H$mth`
zSoqaK#e}75tf4`*N9S-UgbX=vvedt}kcA69z_4uV_C_d$e@C@jc{%N9={kk#+fnlC
zRH%)-!MvrKXeV=RxqSAhDngJWz_5+v{=et&lekAJfnZ%MD$CL#{UEq`pLS=`Yb>p=
zvNQ?nIcgcXn5JdBM!kXNfr*Ov8`VxP6l9V}TX+_NssG)B1S2&rBQn}iTUWgRw_@vF
zQv{FtWWV(jt}%5}Z<Iitkz6OMp{~+eA}c%(@B1u?WlBZSi;wY-urWs2ViH5Usy-L6
zG2nP#4$}-@B^RBlE?tnh*4Y~S;R?#6>F;kw9u6Ws4!_}@e|~($l!|Wz|G%22g!+Yw
z!~)h7oQH?g8;7lV2hhzIRBgi*AMgN>8iu=1&EE~1bFELFDls2nXGEsIN$~$CR6)ax
z4gmuH5gJq3M(f|Rg@kCTigoov<g9J$HVO9pB_KENn75p1Wtlm8A^rKE8Va}Y<ke8)
z|0`CEx;WqtUa;xPJ|xvCHGf@cIo_kk*T}-Sr4ro9agPnWQYzwO%Q31d^VY{+c2CB0
ziYZO0T)*Sv{O1~iF}E5oU!q`MRDnXT!qU_T0g7(301oAWe|tk~JKy9VQuAOL-@k_C
zj?XMJym<Tm*tfBbf?g8V1*Zt5Yrov5@yqjM<rA}rdcJ*AF3ohE5vtIVFkvUGfPde+
zoHWM9cyaaf2%(Np3NG`d?znOQ1?wEFkJ<rX?GS{jlQ7o}<m%^0mA1^g7C3F({#0`b
z*2StPBa?I9;Y(f4a?|}WA&<~7x@1h2CJ7FYzi1;zCCm(y^C+H`2?iJ{mE33l*Aq!F
zlUhzy6^dT|+&cuVPP)ynuG)<k8KQ?obwX;FC_%+unwcj=NQ!X_ci(;eg?=q0rV_Rg
zU6aa11~yHa+SG3dxBG!ttaql>w;7738aeTb7<yM~py@wmBII$C>_ldYz@TeLRZOqI
zAWp^BZVd&OUK@|-%7}EL@E$Ntq;H$bi|gzNtToE8Ar1Gk?#m+D$;Ds)SD$G7#|rhQ
zzIVmg0bZh3x-_oMbWLeE4Ik&=`(|&Zs3(}vpAAz9b9*Im9GXLh(vMz)OC`p|u&=)i
zq4)gK%wOR(CFhcwDMkX-ZE0`)&;DwYCDz$R(l?yWLQKvFnM+(wtl`xI4*7a%58put
zU-b(Te__SE_-PXvo+vm9Y^427uSz#6#da7I@>%_rw?zLD69-I<9?17E@1T2q0_t#{
zZ_r3wwtMKK?_n#0->TfU#o$G2fd<m=z9e={GZC|4j@kWe?`XI`+F#@r>I&aD75o@S
z-NFN@dKK~?Ys7L5{l*Jf7sGVq^*tqi3|R29*@f-R(;nWkd$O@|RZFD|K!O!8DSgjA
zhtpi9xYnNQ*?3IUKBeKSFeQejIWBPgbsdaRHn<wwbY~S&26*C+s2aYP`rryXXC=#F
z8hP8~iSl{AnsIN>)H?W1MryU*O9A}q6^#4KJSCClzNW8l)D5c?8uAgES6q@4LmjJG
zm2m#iu{J7nT=-;V<lAETyNNZdB__?cq!8_!hb%K~Sp)iJ^rzt^w^mZjv&Zw_xokXt
z1gxJK&Z<)g7)m%1Rph2ydoPpuz8s;K3}T6xvG|{N?r+BCSE6`GPq=+b4E?qwZ~u`D
zrN1-dN7P(K1vz|TM;csO84k<5JD9+ZaN*uIuI#_%dv8ib_>kReHx0g!fZoDWR2|d>
zl;QX5_D7IOAlhFNky>Hlz@;Y(zb9-wAE<|Xo;&%<(>SjrQvRs$yB=M>KN_Z!v_y4N
zgZTOVv)zqt<V9j}MLw?(m+ZOB;)?*o2kiH;{!t-06X0+#XX~yj(VJ3TS;hY-Ie(Tq
zrgQnOcmmkx38Cz<N?`T_2*DP6aC;LPzho(;+EAq$bqdcx!znnU#F>H_k)+QOKd}7s
zlfm@Bbs!OJ3tzx4rz^iF?gd1ePvI?Q!vd4Fs^5$jY27`c><VQ>^LN4hdA2Q<C}f}J
zLt3Mgz8lgxnLN&4Dy+%HLVh>EP|}Vn=C9vp<o;z?Ko`{n007a+(ZRNHB$O9;NTBj{
zwYW7Exxm-$**Dk&<fmsnS*?9tqd4}?ARlgUB8L>?GA92;*(fFEkdcLNZ5z!<j#;&l
zN62W7hS>66cR#T36KNeHe|4(uDtZVEOo^C&gHQboSg#5|I_+$)i?$;LVaL?qmm+La
zD&2<t6j2PA^5G`Y3L>a{1v|q~MkI4cUaP#jYhvSVH8@-ibIC@exZO8_xvBKbp6R~g
z_$1xABG|D~zD9$ALEruSm`8{gan}d_3JkF`Mss?RN%N~JKtJ+2S&A=|38TE5s(()f
zgeh;~G_`z8NaoJ>Db6Y;N2$J7A;C#D+e&A>ls?rhoLDbEiKk89)%tc^2KAo!>?DRJ
z3RIAU?Z=V$C88|OX0tQS#<qR^pvtp#01A1X06jU$o~R|TdF=hulEAMdjMAzDNDazs
z1xT0x`*gS%DXs^|q?^D{_wR@MVqCWDf?jq#fQsJt;iqPZ%GsE#%#U#fqp0ufsEMp)
z#2^#x8ypvSj^i8RPi;<}3mO=DO3Pu(^R-p9qa)q%yoZ<5y%B|OI``<<#)z3vFV8=7
zs)P(dA{Ucfee9J8U!7_y-nMPc;?_D1Ce_N5b`)X$k+0F!y7?nLkbBP5W3!~`asW1W
zrIAqRc)nyHkdO(@{}M;8<D8;q@L)jH_wvYVetg+(%JBO%5Zp;#|G9C<&vt7~p)Q)y
z0$TzbjVe$7oQc<j!t86r%*benqAu~T1G6yuQ{Oj7a92-16F=Wq@7@O;*|tRDKTBL!
zzcMY^0Ln@=w|<q*Yb=84@vVk4NTRT79tahg(|fjN>*1oOtDn~*?%E>m2*mHF<ZI@?
zKR`|l^T3g1bbacV_ihQ*g8rM>mxQ1&It6zt2E!*h&wKQK@QTKs+FTf~KHE9aiAdI@
zY<5oGnPK75LW;|0``!>cySV&LPk>=X0^&&?(zvE#5#Q=K8{ew94gSepKHBNxQ~~?y
z8Q%*l_~miW`GqW_r2Y1+FPUpdFD-_n>ik!!Jsp4Vpz|XxU$LWn+mAPq8BVhLCr+{x
zJDssrWZmy^<@Ti(Uvdj2Gq&(#V|x4vVxy!mw(E~l_#o4WgP~$$@tA47O6xedgeedN
z=t{EN<glvvX2V>Op@`S{!MGimZ?ZKoND2+L-k7Q`uC$vPWBH<14WNz*YKx?A>2v$j
zQbeL=SFe#f4BAD9*w-Jt-<+-;Zn!wL$p^n%6_QD1s+}fz6?`_}M%fH6uep+6td5GD
z7KnDrB*QHJqzqQT;yK>?6HeV_`C&2NI2O;T+1-Z%=(TdyGGixf=Ne}EMYbR6*Epp0
zrQTqVHq2J(4V(eJ!Q7w;d5v_h1u|7-r&Z^^6T2dIar|5a1Pg^=#tm@C)pTU>HR{F#
zVrK<m1;a^3-a9hZh@}s<lk|*8W=wv0xD0}It)uMrFAo&V3Cx;?<NfQu4V{TEMseM4
zXr=AM5Ec=G?dDXqINAU}njoNSxdH&Ftr0@Y*S*$rvyp(tR0^(c#>i`vZ(}$+QZdA5
z-X9y88E;eufTgLA7AaN(5BJ{Y?__1IQ0(ygoVoy^OEvV~Y3C8u<fyi`o2)1W@X7>G
zK*-I<Z%aU5;Dlo*ef69{U2soPiMq7OpMt%k4URS*rQ0+qEl(cy;p>txx+yaxHOhE&
zC;YlX#z|o625<Gd|M4e`7<sB}y4BE4J+|*d)-s{wrR$^l@ZftPRlxMrx=iF3H%u~k
z|9BM7lg}<HFN#}0W#rxrsoQx>%V!-M;}AM|28QAoa6dfAgt-HQS;wtY(j=3e?9|Os
zW!6P1u<tr<xDt>M@AFd32(gADE#$dsHF$d*S?Ym5@*W|cN7iop{Ak98HeaO*ILEh~
z0b1r{VEGL=PCW5pvn`U}`PuKi>!J3}+$Z>F?+<Wt4yW2e$ds$=8JxZ_S>4OiE>5wV
zt{IfCudqhQBYci#+_Xxc$sD}|GeZmt1xH<I@0j?xZcR@BuFZ20csb3bKJ+L9e0>&~
z6c5w*cMtZNdFqw-1wv0(TsYoX&#YnT%ZrT`t0&h2WKURLPVqLdT5XfXIs+iS-k>-6
zm4Oh;VS&FjL&%K%P62ThB?MaY4)q@AW`VM>D~TiM15i>&gj3(|0Fdgt7Kydeo3aru
z#x}EcwaOpvG=pTmA$FT<eXDjQr5rRMz!X_nkSJzD;sYx+7+rmEqKF$Lfdm?grh(Ux
zBpI)gBmXtoWk?>n6sN5RqFpj*vD01~3AEPZsJsChU^w=1=Z|k^<U<djx4`%vIuJ`?
zC{Sz;Wrl(2OH(SM2P0R^cY%qAeR<%kZp&WX23;TN8chdmmNR6z9bnC!;fjcVVU0If
z;Hiu%8Sbmd^N%;`u_Az9XEtBG1G3ye^k+Ql7^d-I{E3;r05pj<ptsC#-beh=TM+9s
zc-MxBiL4|!BS9N>H5e?FRhKboU#M4E=D64~vCtl7NDnW#CK4jn1qUjl^YPvbgSot;
ztX1LBeDzcuc<&KMwYYyt&n6qA&4w)EML3n}A5S^N>{qgA@F&E4rt}H)`Wym3r3WCF
zLNID(3j-UMKeL-NS(9(|^k{qAJXi77{(ypVp?Y@2jMsMk)_Vj%B{K?`1zWblcpFLp
zKTsdovKctcJ00|@f!Q0gLJPq+%UF=0AX1g8vHyDgz3ynC;>J+O^~D#*|3}A9BfAbj
z%6zu$9%^qcgPIa}X{GDN7>Duj5=nM4G~ui~eJ{m(S3dFHi*8i0e;e2U`COaq23MN`
zdsGRCZ_E1{gq;Lx#F<si%u8|ZJXLL94XBWTgpByA5g>6hta*ed4(!qK$6L;kKmcbA
z$FP0{@`K=V{^QQgBqap$ij3+fwF>o?(ve^yNc=J_?7>2$_=UruV_FrM)>E5k=IU$%
z=5$4`XK959%MgXmfawa7GA+)!jD96O7`GYt0_cPO#Gp~mK-`n=XJG#g%AZL{q!$5e
zS_PRAruvA2pW@Y&{*SgM!FU^!WnF{I9>g14@p^#v44PW)OxIFX2wA`j)-VeMi8EJ)
zJLPW?j^YRM%!UGYP=Shn0Miq|p3twd{m8+M5SS^*r*(*1R}91V*PT->O_w2ZPL98w
z+G%Z|%YN_5pp6=kS=M&`GMwL*{4Th;w*r3O{^C+N$r5^lu7K6y!gmS8fyz_%;YU^s
zw?>!TWcP(j9ulFfW#8QhEdL51<bqUxc8~;I(^3)>D$x4%H^eV6NZo)8Iv;Gfq`k?Q
zsjwO@0EN4!De_y_BVrNLr32seSX+1uSk|zA{RgM}U>A^CpE1JU8JL^{hS7APLG4hU
zRv}GIveTni#q46)ktJaRJ|W|}OvnXSiw{)4Ll#o-Kbbcvoqq+#{sxMl?B^P`(gQ~}
zKRk~pRphB{!y=@!47?VYU4#JO^yV8{NQwBxaqBLq;1Tz-ZeihHH#w~bqs(ycu1F`a
zwD7!xaveI$pi+wu=)YGG11l_SGjaXCYy*i;s;Oc;S1Z6Y859|%dO_6d5CI0LZ2}~>
z6etk_6eCF~Nx?|2QXps(e|)d$T8s*5B6y3XN4%5evIv<(!Np(6%s=4JtzYj$P8gJh
z><}f*Gr{Xqpag*&Qsk{Xn)96-b>af>W5jqwNzdJuj13S6yXeylUIlS*Xng;#m&X_n
z-t1CzxcSFl?;r|+IYb848wD1}9{7^7N}KTnH8Y^qc7R6XN^0L3K@XkvSg9o+ckUPJ
ze02Sxd&&9nV&oVY<A!q8(|e$^3?_QMKdhCf!J){F)GE+1b({04-tcM*CV?bbr234_
zdhaQpf#2V3!Q-u9OL1?hS5#C1O7VSqT(0n%DHTWAK0um*P}HJ;AAwDNo8NCKfHf+m
zWSBb_sCQMmOqlSyEEN66&1)SvDhuFYp#P-+WyIJ=I|t%XRmn1Vw4tbKphY$*#`Cjm
z<g6+Jn9%Ni;f>ql13@=lT7oAsl6?**s_drtHxQ+t!RA%@y^kGi5G85#d;KD~J@FwH
z0O{<+?YahDs+Q%>?cV_mqa6m0h{|7YfsW&j3daIt!x&lmSe8fL-`0HwCK8l4US3QU
z)gS7?m)`3e7>e+PQ~=7~bi?riUcP=cB(&u0aLOLVRoeOpcy<VY!Ptra{2>Ku|Mx8-
z3|$6gv@#Vav~Lu<?<}CEW3X-ORYUA}>-VF*wE6b4R8t}KUPmfu(MoqkekEb=z2+Vv
zmS*cMmVT?07`WIUr5OO^LMa(=GVZ41U}Q(EBAZLt26$K2<>q}(R;3DxnlTkICc{VM
z=xk|Kpu0U$z&-`URs*<za>{wKtPkez8H^;WS^6jxT+RY&i3NzM<UJ0jod;Zf6d3Ye
z&R(T5m`~p#6hW2*7p6u$S41<V(!U7d2WZ?N^zF%S7xxe#n(d24cmD2%6Jss2O`#2#
zh>$HST){3FK)hi4<A8WllOtlNwDVPp2-M@jKqu=w<&`PGAX`<_e;t^dzRM5jjG&XQ
z5(IR!5yI>2M>UXaRJdXo{_y+)T(6*`fXD9lobAN6&*@jJ-mwCUetXVrgWfdWR;1hW
zcQix^?yK}u|A#-u=r-w>)u9l;EgLq(0&J>Fun#Z<Zz=$ntN?_I5b_&$spw4O^$?j9
zrrCI`E=+-H&sZ<PPR&sH4TxH2M?2L;MY<$p4nfNc2P?Ngm}1Y^BB^Wi+k{k1X6`an
z!BvG*cYs0IBHC`YvO~CDoA*VU+k@+-nRxNlLE^%5So}tocw>zgiw)~r4Pyn}SN!Kx
zD>SF5rV8SUCqoPI)Mk6EeVffhQcm7zitWCQ<=wnE6VR`;3klZ+hG(|6C4y6}`PAUS
za7xnX5T*T3!Gc7mtq_4J)hWu!Zo9Cd&7<5bMZ3ei@y+n|`W9iRmWb4U)92ql<Bu&S
z^^<tqqGDBsA_alK94_o)k>vR1U~_656}-BowE?2fdip%eG>rhn7I>Y?Cjo!+g|U2X
z-i?X!Y%<?y|2)(23M+Ll5UeLv#0YwA6js}U$ue3wPtt;iVsq3BAC7LRyQI~u?&9g0
z;kKzEX4LG1lcCe25dZTmfyx(&E4982@WDN(SsGbEXV3~_zK3$E#dTZ6k-uWCT8q*C
z=^1qRHkalqqN#W>c|^bd+~&wg>kRMK)DjX`rCUCj{(bhRQL$yUe0OQ7v%S>0!k*a2
zRTkT+V-?-!hh4i*(VJ_83levc6kMC1Ukpqf7oTNKZI_tOVF%3E98RVOwf?`|k{_l4
zKuZln^8$1oXayZU#aO@ywC)vwQGO~#xofv{GGUyoTG5KzicYy5hMpXyAi8oD=*W9)
z)Zl%B1cMbT)K!Gq>+^9$>Zd?f$kyu(@O4-)vzl_c%ElaE<(`3#(K{|XSlqiP`AKQ;
z{#ban=_OKXW|&yN%je~I;R9*5!iAq~+|pespRKYGZ|o5>-m40$+Hl*UWBbaoH&Ql~
z>f1^7zJx2GI9BxdBA4Uqz{9l6%A&mhpAy>upC7H|&~2;Fn{mak$^OPFy|8t|E0Wt9
zXN%kP{EOLl4`<y}CzM^W^txUxVYC$c#c?>L7oH#JdwmN#RQVCdtu;}Ua~KuJ`*f=Q
z@}NT%IVvMMt+zopjDT*Q^xg8_`@lW3pF3mV<{-h4Vmk#buKvhnTpH=?-OPf7Zcada
zx6gY*mw2D|RQFetIBz>h-c0~iQRT_ESARKxE(PGQ3PgHfzcp5y><IxJKnaL0wp3f}
z8{Q@PcG)<fr=WY{5jK77bN*<Ly_Eq}oD93mGQd1)Jziu7>b3VXS5LDXQ3UHDSF3ky
zKbGv(k<n1u$1gu=+)VPE)*Q>4Rp)8nFYBg9e)03OIqUD*%B={P8M%>C`na<S=S#Uo
za)z}FG7f5Up60JH0XZCu9MA_sCWD|5D##$PIt};P+r#&IF3-E++RVmp+ux_vIK8B`
z_GQ?1a+j)b{hfsn?)X~$DOCU5R7LFNQIG9Z$)c<X{3ijweV;U~&^BblvmcHgqRkZ5
zA;LpU?VdC$eEQ65VK{&D2Zk)TeC&x`?!C=E)<|Jp?T3utvx?m(IOBx-WX>xSb=p%b
ze`Zqj>i1?YE{n3Hp3UZmc}@nQGYs0|z%7T&f9KtDTBr<bMtAJ@trE+TVpz$HQ?Ng2
zGOQXbHg06mec{c!xCQKKdC}X3k`0$JeROKq=*m=$86M#i75Hgv%HjUBzTL&7+pu!T
z0Sj;OWEYLLHB~U=Y=+^3K__lq(th^K;n0%daclwI@7759A0DRJBZ%m$Jy_|Ru-XT+
zuotx0%c2vwB3(6JY?{vM^`(bs`RY|6GkYqm@CSG1HsJ!WNiR*sRJ-vpcNUqNlUPJC
zzqiRuzt6y|?rDHB{?F(dc=@;BCUK%`KQ9UjPh4{eJ~#$$=x_Jx9=r)R`YM!%jc-b&
zIgS52hL=YPP6lmH*)PEXrI5!G3q`=&@>IG3b=4{{c?2l<38Mg_nRrj*wh<zuP#_)4
z+(9U|N<2k%Jom`|GM9VuM;gJ$8BRmf>>~yo145?3qs^?3JfhG}WBnvVw~B9b;=7a*
zuftHyLtTltK4s4|fGb~_6CJxnJfqES?>Kd~J$I<g7_Ry5RcQJY+`jlA^<pmb4wXg?
zH{-Hr?Sm<-PX1~|uG!}vg#C$S<~NvD0+WYc+uQ$eNubhaXFc5GBeO-eU>nYnh8SNy
z6lDz;)WgOzrQ#lXd*!!o{H?wIIikE=Qy3hCMtUITuxoyGIfIrE&=VfU+AfLU%by%f
z&}tBWQf}v$zmf$#-5G}o+aJ}b3pseS#b6r<r@0I^Pz_wmcuk2T4R<BXNN9*@;T1U#
zj3(u3&eC(`?b|lts^(Q7fy(UCe}lMk(;U|imTF&BGfS+M6Uaoaw_G!Pp9Tz7Y~Iw%
zXO&G$)L|IaO*~N74PXUEN9RBoxLoWt&LDPqld3O!(p#a+PSZl-)5}>LZE@b?A>05%
z18?sS0K-Qi0F3&mIHsjT@wZ<_$7xIOYfFZ7j&A4m$%fJS>)^mUc|LAXj(8}v#s(%k
z{&X#RKZOV+(rs4@2Nh~@+l$*#l)^(YV;rs{0yD%@w-`IhCXMx4wmziCVXWmuVl!Xv
zA{^WEbzA-X^lt=vZX7o2s@ysSqkn83+BRNR_RXHcQWgE}G|%p`9k9@AwyHQef^a}5
zru%53*w+?cnqby4((tsFdqS`;%Uk?GRKHiTCYO)Od}>gwh3tdNt_Gq4)p)u61_#(V
zeEUWsT}_*$z^E3Ft0&`THB>^Y|1m;l>$$|hpRgkbghVnF0<ZrqQMBD8wlmFEBnO`S
zK4Zok!Ea0Y4G#ltEz7qtsQ7!!X1t&^`X)CnR=sHv?rGdQmqwpdW_WDWa?jEI8g)bl
z*W>IGmM%JcMtvF^gt64@xNdkn?jDb|hTHC~_$zY5rVjmF_V%>a^C7S5i_GoghtBL7
zuR>4W^A^tSXYsIVwWt^mA5|gsY;KA*wrbfLnxI0IgKj?8wMJM}dQ)29mFjxHP1qj8
zpOzN+nintBIg9tDj-Zj-SzwsK^NZ=$vW_9=-L<JlTf1QsT*gE=4YQC4)dxnzHCE7s
z`npf*8btIQrBB=YOn%)!vt!}Uzg&Q*4DjmZVWqe6{^Y0tOb?T&2sEr&rr&rOTcih*
z^UyE%$%XGrDz`w6=0r9>)>coD1a?nM8aK|^C*)OT(?Ap53Zcg0>GP!=mjxFrxIQtA
z(<RS!3*tSRj!~o#(m$jSTogmYFR>%`ZwBip@c@*{TV$v3;LYVlc*fa|V`MEn{rmxI
zzawW!R5kv4)m^LKku<8dDEce5)}n7{ydv<W-%wP{JGnzw6$dUjMV$3pNpa=DDp6A%
z8UL=b4>I8NPGRcDk_gsTo-(XoKNk$3Cc^?Zzny=dLnX)UCs^$h`=HoX#iaB*<t2><
zDG^h3xcuLm2b%vXz_?{1UGKdOC{cW4Xh!hsT8VAI-gLIn7XN)t5MY8~djz57YakQ0
zmHGe*Qb6BWSis7X5Zai3DHMY@!GjDNX4i2|<MH8NXnw2Z7%E(zMehjuUF86m;t0wr
zR4cUP7Q>*{H}U2U{ja0Q$$^yvrulCX{4!Ih-K5Kc2Svxa?f`lMHS`_d?+Yye*Zn`s
zrwd8oaQy=8{aHaJVdVs-!oQ9Z6$Mttt+i94`9~_GBf*271u>>90fzUG%r?JsY#r2%
zd17Dxmr>Ft1P+&E|0GG7$dt;0w#DW5QJBEWq)t;G9{jdAATbJN01xi#u(itl7m6&s
z1uo)%@Xo))tzZ;*BRKa-Fp=T^KsHuORo=V<+=r5|0`y;df2IsB;ve7>wQJ%Pa5%??
zM~?iWV5fFC!@r71{s92LQT$@}eCXE={4$H4;K9M?$@n*!kkO!e_KSG|Y$LdcF_EU@
zf1m3Ihg<M|y<qG4e+BXX3gT}g_5XE2ltT<MMp0lWGEBwCb$a}PE7ANlfXLI~{$p=}
z8c2U^0-?Uy*&$P9`rG7hztkBG16K-I**p5}(*IabKpH|}pSsX!*FOm@GkAuIx@iB}
zKUV2i^&Fs)bbocnOd4Q3C%<gTpNAx3fik7zC;hX~W3Z43ZHLQ$79vdr3+)_5QT$y9
zn0zXE*#5uf4=nTm1$$Ed--Z5vdv#{gCV(@l1?bC+zwucNVnnsQF-C7R$6tChF&@e#
zKe13k2{Z{aqbW$H1SYhNeJIjMR{zTUKhnvJaurrB`zUe|V<=FBrvZIW{tD1vQ3C?|
z|4{bUaZ#r2`|ye=Ac71bisUdzNGu%#NOulMhagCUw6rKlNe$i7-Q9|$AV@lZfYLF5
zbi;eiKD*DeyPxkL@9$q6@B6;`Jdg7@j*FbG!Ucnt(%!`;)phS@t4DPN;y>dQe)iXE
zF^9(=(BC~U2RnX?X2&d=PJD3QwV<*0{yy8maTMhPzR|X78r<ly_KmZLGUf~QO9F5_
z-t`9zjuiU^psh5kdHZfs0Jk|Y)8gshmnL|L34PRl$5!=_Xtjo_$T(mkn`4-4!XCQ;
zH)hARjqS)-G%IFXy@GPvU$|HKoVmfze|_2nn$9|acn3D0T|eCzD~x$y14>B{nn#H1
z=Kv$l+jH9OW7G%bS$bY2rL(w(Qw&-PiUNSn+@`cr<+P*#{~17H3)B^EgMuq&r{1j8
zOo*IdL)Y;qhd_{)f4d#{3IINDVC&gd^F*Vw1pr;k4pxUh-Q_T%tZTn+S~;QCt;vE0
zc$PH~hvF_drvJ&~__LG2>J@-t+65})H4u6kfqe{DIn906LvZ#c$2ErEB8AoqD38cP
z2myZl9MCx`5{x|i07y#}LU*{a=Z`~O0c;y#H5F$kD;cFqNnCuM`<58SAas!R?)8>~
z34Ikaz=r4uX&9JNeSBdCDh&0YwuBlKIr=_89ocupfG%s<Rq*;5NdEm86y75-;_Soa
z6bC&}>)9;KPp<+1un+_cnQZey1l*Ks$dUk%3jfDP`Kuj7f>`owXt(WIJ5Z*nsU8&S
ze?E}-voj7`OGO{pJ(dDD{KNI)^5vT1;MT~Bej&Yp8UF!dWt9BDTE`7(uUWTgyTbwQ
zA=F148uVSh7eB->)nJUG9Z)mAme53Ddt5h3pncHA^S8|rGuO<-XbS^pY|2}oe>3tb
z4J!e2BEts^>LIum8y>PU4eZC2)>4bkV3qD`xd}~UO~9))_r8A(sHs2FE=yIcjcL%(
z;L!rb2m_Z%K!4nVdb78mJ8jIhw=1@v{rYrgb6A$97F;a@&wV>FR;W=5N;!NUyJi}v
zMnKb*_Ec^CC$QWxpl}#U7n%9aa3A!css=^b=?)e&J0?vJWu*eTyFvdOxXh#kOvFO~
zI!}^m8$^$ULy=?ci3S)J!I!VHE2W?Y_?D2$8YF{l#WVSG(xub+^3NGNR7&37p|J;;
z{ToGHO`b8JM>9i8A=v?VUQZ|aw&4Wacrdp)%KOb=0opoMYh!^C*oS2i_p>}H(gE&(
zhl<lm<$eeGOnvNzO(-BSv&EF~hlA&Qj?x~;Mq4N_j~$Wm7I4D6RWYKs%v`~M5diyd
z9je*7<hgXSH!c!y02)n7@u5-S)M?vbVe;VXPNYCs*r5L`aJp5_Gq7*JIJ}*IFzdCg
z8$?L;{W8oQC`%}}Lr55#PL6BCT9;$_9gO&&zX(W!9nZMWZ4Nd~<lt(6a&0Q80X&0=
zddl!F2wy)aH}<RpSld>0W{p8Da4rz~_;4`v#n>4r*x~CIo#4X1tN^u!6QowC{&bHn
zh!WY+SuVZNimvLHnCuAfmMN?d!Uq>3nM(|2{4aa?b8~v7ivXfmTs*AYG6hZf4kngC
zd~*2$Fr5eGNkZQ7+DxD@%$dQ!oF}!`<AehANNQr&)1OzM@Z>(1@bzm3Lt^afv-vO%
zp_5F~R=}g0l&2Yw)-W3aGh=gBeQGJ3f7TLIIxE{RPaiF90EY(GU3n+lmBBQ7dxf7d
zX3?)GAN!vBBH3CcQ}s}w4}1<6V^oF28BDT#XJ0N594W2^AQ^zBE?jR8qdw2cfo1=p
zJ?Hm3$Ue1C&VYJok|+nBSF@lybrG_rY8Q%X7;Zvr6|)DkAmhh^3#37zZgrmgT`S{G
zjFQoQ4S?$PV22bGq+bT?iYg2Yy!lM#!p6>Ms^D&BH^z~n*gelD`j>iVn$#4g^wItG
zJIo6v(MeyuoU|_3*9+>A^bB?y9hxoU`OB@q?uW4$)&?8tY&2^1j0icDb5i9Jq-O{I
zu|qEu=KQW&V`7Fxj#7(T3w*Ri;5%Bk3J=A3XJK{>Z7>Z`Q-Ng+<_APcRRyFBzS3C*
z3v&G(Q>gOZH=U2SJGHQsv-DQ<Muv_!97{B31f7>rK=Ww(>&HiX2T3bpH&0{U2fTT(
zT>#0x5G!p0&Yj=F%iD1<mpCbW!#lbCuZ;CSV~&ax8@UCi!DEzvZ%G}(Ade+~r|%9g
zN9F4ckCSBgHDLR&&djAuPI`>b3PY3LKfsVJBrb`sgS)Yu=VeVFX##l`Z-{WM*H&Xs
z2tLPv+vb!xxPQtiS~k<b1;UCRe0=5Yo>H1M;@z3N%yca~SW?=_ieTONG1|sZ&lrGY
znDV0w#F$t+v<3N8Bj#(&j}tJ13?A!3+=v~Jz8HSXu%orFXy|$}+`TM}DVAs?7Y8YY
zLGZa`w=E+8Z%fYmaMEJt%-4@$3YN^ZF^3O#yh|eeCRZgSt}{+xq%9p3w%DBz63WAY
zpz!II=NI=<cd4t?NR5iOPdnUoF<Q2%SLA@<qYGf~G*mO}<jJ-DaJ9O_lku*>Oepii
z^+FqzmFU#AL)%F=r`jGujaVJ2ZXsEv;~cdQ%qZ((My-+ap;}{}w@8^P=FWuP6!|c6
zGRp(47w={ycAiwzVC10si&ANkVkSgdy&4ceE!H_GXeTr%7t3TU&X>1eu3H~7*hr!n
zHU_KM|8bbp@<ZhGO!Ap%`h=t}<b(^Eoz^fUtndf?LYbo#CXz=hKWJvjJo0+8_XE;K
z5DeF0jovX|)z-5T>`t3HU$vF37N`XpR*b2|#vm9O)?09+6QiOKkc$DmiVBUXtj_5i
z`Ir*<n1{^evtYWgL*<89g!*KqbI0qk(-E#18X-AZ?!Ku!ajrNEG3%oa6?Fq%>#-LD
z`(+MDt<Ym&2$69%GLox!G!5TT+v`w~P*T@zllKlu5O_Y(PVN?Jzc=hz2mn<<pU>qv
znNn#1xre;5%YkVC57R1HH5#U5n1L~j*BD7_p7zRzDB_v>7^gCPBCS2_BvDN~d_>Ql
zIl!!gl7FgbUub-=oiZ3QQQR88N?=qD-7)f5B0DNLls8o#TIF6ynaF9?+@Ar&fV7I3
z9L=VZ<VeK01v;2M;sqr7lyY5IPH(;B)MZj_ViCMO@u2LUB2<x8;Wr%qT=hp&6B(6i
zr>Zp;L~bE<&jv8EkpH49M6ZBI6Nvye6pqqT@-LZYhWVx4<b9c(l-H1<?8tXf!Mk6=
zQuhO9FkC^<$1&a-G{iSV)3l5ef)P!2o`4i1rLOrA$SqBgM4EdY>^iJw+9;s%oy)y;
zW-0L4fu<{;i7)radH`5^QNK`tZNO4VyB_|`*EIQ}IQ(=H@XK`FmXj=m?myHe`A)jT
zzz3{u6$|67i~RC-##$<3)HG(q=Q<4_5qh%EFBC(W875?h3L|US)6}MF4%MPxA!Dt5
zza(7*&IcO4V5u=f9)8Mb9P3btekX)O{6;U*tlCVcqCzoJ@%i5H(VM^-jDFiJC31Fe
z+sjJCW`b2aTV+K*BYRW@{w3GTW-B=(OJO{yEx&hsOj+G!>JeV^)0+x@T85%m_y3^K
zt(PB4?P(~A<iDH98wwmmW{Hk_?yvsLQX^wN1}wSFqavs!1Lcz-PTdqlU9C!=CB=Wi
zpFjH?_Rv3v)GfpBvN0@OV?Qyg+gqblKzg5PL>4MuJGC!a3f(%FCNzr8Wki>?UwQ&Q
z!S`#-m4%>n0cq{$P=OJJwQ_~McW)~+uoD&|tvi^B2l@NBq`l-RnAs3EznF`VF31>S
zQ%`NzLD-XIUYlj3BlS0+n?((EJAN0?Q;SwbW`-V*7+D#}^GGW)v-Hb;@-SKHDqhY$
z(o|H<lHgr&;RI?$FHT_LGD?sgDrPfTm8)NUsKDAKMXqNyQN@BLC1V<<q+DCIX#sRR
zNW#E!_BIYa9M62*CB_>+enNDYOMpQO0=zzuwW8sAR&z)XA^1l~eyDAXcg*>?$7eaz
zT6O+VF)Sr{g3*diu*v7_@Pa*D&cWw$1TUAmcn!^>r@GSBFMU<U&N@s`d6^p2!8)8S
zj$)XK;57cn{_z_Y4zZkqN&c#!baSChJT%~SioL4lM-O?H+&Y#n)o6QSk<jEl=4`Of
z&xjQQPlpwa8=!!Mf7i{}@1nN&B$o*$9kK?!ElpD>Es2sBuo6}9d9ojC?IkZDZ)qiB
zI=>Nb<x9+pv}nqaP*+q)h-(=&zgE=GNcX<uLgAU_X-;SUJ%#x-sJ~kBta?rwB7w{`
zPtS)!pLgDc^X<q3GAf3h?0vZ}x>ZlkvV^P^p2aD}NX1sDW2dtRs8QrEC+l8WffLA6
zUIN6veGd&^?)1_}HVAnjlZ0s2^$oUKkBkyLi#c6?1|mu{>H!BY_suCjDbL%i5tz=C
zW>sQ@v%203PlMMEk5wG*c)O%Pmo(ujEey&GadErF)y#sALh`whr|~)=Jn|gl38PI6
zT@1%H#!WiXSlWfF$a!LSD^A>q3XO2#&GNx`SluNnbIFnGA#4ve%)rTOuLo?OM!8SD
zi|(XBLY(|2!`5%EL+_1eX^vWmVIg?;k+}dP1bQc&e@yYH6g*g3#h$tg@uN$pkRJqx
z9UuGe;D!BAq=cbi&VckIO2sMQBwJXEq+-e2PRaTd`<!Jhv&jq@jC^K|bO%9BC+Yn>
zCVspde$R^2qhF2OASu)^ee$N?m@<0!;}aL8)HKG+N-7lk9%c$-@rluizO5b*Yu&y^
zj3a%gNIpv5;H1fFT~b0tnOMpu?rDrBA~v=&y!+M@6tk1caH!}P%?n`VmB-?G1PD0p
zSUulv?7NP`gr4-McBmv+t=1uU$De7$#tRgikF~ESui09bzv(KT-GjtZNJ2u8QbPD`
z%u}l5%yPshay365ym@of*<bY3vE(vVW}SZO++(+XA+&fgwOvYlMZ+tv%|(R)J+hF6
zufk;`L@wkv#lM-bn1~&(Dm{LNj8S;Nrk)khyf0~C0y0bHgLAMuo_87*_6W%~rGBcv
zNkGw1ZBS=BNoqnq@BtqlVw-z@YN>J6c8GwyXu6NuZXQgJbWv)R*$jkLW1GIvz%D-3
zlFAv$zp#KF=UGZ8DVRN4=hjVz;vWcpT?zhA&GK)1e~3KoT7aQjo%rM4azIrYuhGzq
z3Xld{`h{X0a}j&7x?ocp;+g#7QqEc(BKIQqTkPy3=GdmqndWbJEMFvf*Fx{Id4AwV
zlAq>|4U8%CqDdi4Dio5=YEwBQsuu*=$QWeaecth;(LH2z5hP!<GY90QbS69Cu*lO`
zd_MsLi(~Ld{LE#oFZdBhrPfqQ79LqzuBV--j~?&$SZGl-^6qxj-I#94P%aJ9%=M!a
zUsq8ww&YXdz#rcZjAWFzL!%Fs&kBO2{YEPTk`JMe&WMELPe;3mMVEw&ZN8ySbGezj
zO}A4P3(czRaR=o!a&o{ft0iy2n?@C~2E+ruGaB}cH-n1J6yd}F)KdQ2KYvsR3oF)e
zj(~KxJUYLCl58{5O+(0d-7@QkAQKl0&~S%#A`SWqn7I_y^(XB<cBt&~XerkdK59~{
z-7=klnJ{p0TLW{0xoy60Izxi7I~E`l9*I0i!jFHM+4A80U1zbBGjJa?q$qJ{I+s`(
z|Mq;qx_#$ZxPzVAJy7t!wYvXAvb_`lOkpts21u=0vE0(EH}bKb6Nso<B#5=(!lh@{
zvhZmf6J`#68J%Lgq!efUhEK{AMX#iwobc2T*rzFIW(sM}bksnVoq2TL8ON6-#A=F&
z{~xClcuA3uhEP7@v1`aHRdOX88?wsm*xuC8fKFk)%CuSp5dEmu4st0#!Z%s(i=^YO
zRu_@3y~tzl89jZy^134ITbhZ{e>?F%luHQ=PdtrG6$2IpdjagHVGjE6Z?Z=WctooK
z{;mYs!9*thophiuk=kmS_pb+yU}{Ehs&_z|*7yM7jL>{c82^!!HmVqRRZD*+^nrub
zPLbgX$9nIR2OXyW0tjFb{|0h5a)THCYg)q~PUah(md$SDzz}vpI{e)T<cu~TUat$f
zJl!ZS1s*MRt{Yn4jD_nkNfV&;n*%l+qB9>bzRYY!azcO&-gh9Eq1oAi^1iZPX9Z|b
za}v+}lyzKR5Utz$6dm1Dc%)xfQfoEZ?Rkj4|9QdjN(o$XWnkC>=;Phv1`s#LS}LWR
z{|v^&>To-2EA*?Mhf{f>EA35yvtJRQg2VO0-gPPhB&M2}K|vs797r>5?>G)ilMAkK
zn)OE69zoW~vzgJeK1YT?(bA6w9RC^;RbkE%#W0<#sZmUHWAyT8U~o93Kf>?w#Dbhm
z=udb^BKdQ)EVUKDnes&dPDUp}vwcUNB?B<mG0K2qfK+@}A~zxD*Z(N>Ck+1AkeH>2
z8Q?c#T-Tah)=U9?JOTr^I5IeP(4jSLiZE6<E^CNlz+SE6=N#dHqN`NynYhzdn*rXZ
z2?hxQ3aUfU)fN{~Qmf&Br1Twd>_ss&`T&{t`u{rSiGq3j8_3)gcr^mzvR(q}uLXuE
zh7t8l0P1K|5_+n(Ki&7#$_of0Hv!^P4|-?{^neLS>Ro(nC#$-Ew*TpN%U%Q-o8A}(
z>9PTGRE#;u)>(PmY0;UuK~)9F?cBiFO#?u1+gD6g5w$k?^Hb0?*d~5-BaB1eE0!})
z$yo)4k#o33DrJby<l-N$jrLre0yrzR7XuWLc!6-O1|!ChW|B88!Z2ma8kZ@0<kW+K
z4S?H4n(Krvd$`VC3&xjYipO1gu3(5U!+QiaFJT41D<G1XIgdSL)~f&R6ejZP!}WN6
zhgnMJFDe0P0wF0AdBKc34FF!W0~b=cjX%+z1nlqcMt9i_P`(!j+RZ>BY_Pb@JtX|>
z--e0@y|L>rzZtq?_)Ciud*Jv)0maYaAtP{dbld47Z{8W31D0GfXPy?|7_xsH_Ze=|
zfqmq&dG)}d#RQ;ZE@zq5$DmiI1Idk62Tg&Rhie8aZ4?j}n`2*ubDDo9t^bz%W~7U3
z34o7&P1Tu0y#T&TT|4msxu=0~D0)8whjUaIka|;0JP*OpeD^-Tob%i$RVjI-+yYYF
zJP~mD=;IOq$vs@#8q&2k$>+w%jgptMhTLURMXn!kyruE}<xk~2z`4ck1_15GgXf1H
zL!>Tz)c3fbzf%mE1N=WZucUCgs`FbN?ESqj|5FM5@01n$4hhybVcel(rtAAXu($9p
zE&v^!%b2>XF5uG|*l}|pxU!i&hFiVd8K1)(Fd`%J+<cl#23{C3<|fcq&jY8&Xhrsd
z!FcHGUjn7g&_`u2+S|^y;2Sf|^?;MvCj!`;^0yUU$}JoCo-7@L3wq&JdlUK&f{oEy
zG1#%J9>5KJrk()`OB@q&-?9y&bPC209z%Y^yJo0DcYlA%rd&`kyUw)h79$_SaI8X+
zpe4Jfti&&{=hXrMT!RJR6j@`mOF&|&TWQ*rs&hN{20px0ej|E+M2G>WOZT;6)h<Xd
z`XJv*ZZ!jPvQQEWo<|Rrd!pIMt83Q{0-`6@Z06Uzgm94XY?*K_X?}&G(`wjS-hmvg
zd<A&&KxM2!v1TFi0)XHuTSe??0c$%wG!6i_uAqXuHH=yE;t92C0KRZfHjp~lQ)2+v
zm{uWfVwEY`$`K>EyA~5L^GMOt)mRbDtD=;Lx&=NHQ{dj9<-soC3OeKQ1%1vk(*m~C
zt(LINfIO7&!I*{{K#j}<7%IQ6AS^VwbeMlCT#bv$9m`88cq>pBJEW<H7;>Ot6kR4j
z0MBRIiQ5c5j(dqUY3070sNzk9<FyUsPN?O;>fV;Q)AN$*Q=c?{aLKelp9fIDSsWY$
z3j==sKiw%X`54g?EXPM^m@rHS_6;UQ`Z1U07n$@EfLlh<t_8isgW(BC%bYMVN-r@P
zDme|{6S^`*XUj1gV$+!vFi}f$cofnD*G76gQdaa><gIN*s8X8H_?JxpgSkq`Wzm+7
z@ksknxuHzhJyqr<zu-5oq<amLt!T%fhP}U)8*gV7NRYW1nKH4cR%u8mQjWox7?fe&
zFmY}h5K1WRE%rT|1042S<njX;k*oAs;owu1hUXxTeXZ=3n{0hqr}CI)Qyu6r(u7BK
ziDg3s@I>(Fc{l9<KJ`7_ro6@>^bm=PgN;{DWdJa(;o`|2vD%7cmCT9;d5|2Iwd(k5
z4b$<+!Jy9g_6AAylu%R*_FxI1)af}E-Oqnmm{;2ZI$4EBLBNKi*>z+2H1<{h>Uq%}
z=>QfA^%3QSd^x>PKPCxbR|2@7+nXD;qRmaxPn_d$V=G1*C)(u3ZYY)w2q>~EpyWy0
zfd@{6zje==twM#uy!pO|Hjfc3c`<jfez8HyLzw5nC)-lbhu!k8v*oX1LR2F5JOZ^j
z+d7RG0q)P$kE9l%)XfzsdgX@+n-*1=l&w;T?}d6J#-i<0pRi=vKYOy626f_dn6(9Z
zV5Mq!?z#L2W~hRb*suhW0w6V6i`HWWKcQG}%(DXe0GPjFk5rn04Z-MM2WBQvhsGnU
z^QVCHXl32Z$~$-dEfgJ_m*305i`Ne}%rQsJ^~@<?F>R<BV`jv}@9k-lU%*}&ds~e(
z&Ow3U$$om)No-aUNg06l=8NRy`|jaA0LTTp)_Cec8si0fnF50rX<8QfI4mEw``Y0&
z_7^b+o^mdnrARH>sf~GE1@UAXtUwfVGRJY!7%v)>W(*Sa*aQo>bE*pp;2g(XLPJKV
z8({`C$`937q;s>bU^f_g#<+yqeT7R^SESEa<e3ZaDgYW>Y*Cq|8VAaUMhJe|BCk^9
zAl+GLHB>`$`mUJSnwx&`v@IYl%gpCi$w&%=8N+AT0M}_tP*omUG}|?hf-Px=vuYO*
zPyQ$A`EPO0n>bj}WQE>uU$Qj;(veT|i{UQN6RpEEnUn#<sX#1%H1agYl2X?RZ2~8I
zJHopB#?eYH2qJ2gs-&%=fe{sH2LNZ*d@a&F@?yjbH#+2T<?VAL^bpBK0pSQJLo<MU
zH1zUSuQaSTfvgP)RFj=1pi*(D7!qfuNs*OI?)FjLe3Hx!DK$kREyN-+#xF06SB+6x
zHaIRgb@D#y7?fr9#EU-_KK2X%%cw((_lI{H|7Ms}9{Hzr$%FM+`$D<jL!g^aD&Fla
zSQ@*Cjx@0efJE1zmTxoPH=$v7m%rK>{Rfx?-o+)LgEDvXy+{<5)&VJN?^@r+n6nZa
z+M9F%tny9h?>;0rY4^wb0XFE(aHM&pk~+Sx+Z$XBl{!emeNM|<Iu&DtD$Ljy-OiLk
zKCip8=3`83m?2Jck}%`jaR55`zbd2u9aU)D&fJPUs|X~8g86~6x>D#C@`ZY7!@J~8
z|EyJ)!dJeDS{inY(jBWf-eg_c#vcnK6zDaj(xF|Nk&43>q3As)*(wzY_S%Y=ui)%9
zP*~5%NI=*!kn^gn=Ow>H5)}n8dKAPbRcP=;FB4H(<rYZh-oN>+A`GlsolqqbuT6gk
zy^KQ{Q~BG@9iTr|78K*<h;Dpah^ZN-W?ol(J&_*?v^8JYB_1`Xq=1RU!9e9RO076i
z*p>;ck8LZgUBe>f&1F1W73wPhsna@pkxFJxMot_o1y*)wNZbfS#rM;f(75pMnBkN0
zS4K6bu{yaEEM0uD@p`C}p*+!NO|;T%rLj7OshPMkG?ObD-?<l9qZ7eDx@h{OAKdR<
z-7CAKA8z3xyrh&hO)c3Esp!AOmj0Im!GCfNTA~c>c@-3MlM>?dx7J(Tc@ig0Gu#ak
zPh@oB6pCt8FMemcmcPnj+^+Ik8A}X;2-QNwD5;&Mvm>GcG^8>R!VEev514)IVNtBY
zcntnsUJ4taM=e`U$_j*ulFH}kIX30cx@wg2jJF|KcU`}5k+>;jK9P#<jK&UGX1J>`
z10cFlVW87C>XC?60jlhDlH^c{m`)9|oCL+mr6M_2!}QF3CBwXE-MuWiVLhZ6u~?B{
zeynK-oF<b=Fwc^`@9AbCxw+`BdID2{TbgxaVcB~C_+1pTr(wrt(SmqL-2Eh=t?Y=b
zi^)GJzGAO%Sb=}sBVJg8rl^Z4DX&d)Te-#MZcXHTVeIx88@z+PdhZb(vq?!9r{;!I
z2BgnAMA~o{sk>LD6zxi!-83eh!7Tl|!uvw%_xBX^ibdZK+W%s!MDASPwVe4{Y{50p
zdMzWmA||STb&uVuyj>%95dGMp@LR#@D=?vMPI2UFKH5cOJrO=gav09)@;!syFRs4(
z*-xtXomE3y`Gsb<%ta6G|B-O|H;}j)eaLZd46x50yOWh^koaB{<_Mehj){#)RUcJO
zMk=o>Dnn$DLJA8p-lL_F#KD(dNHrwU=|^L$Sfm%RFaTy`J&jyHduuEr5hSC{nTVkc
zC)^pHLV(0W8(h=_lF3z!JmpvljCuV?N|v)~<}lKqMtSNSm5}jVBBZo+=vNY)FM_ej
z<{uMOm<ZIPt&z%Y@*_-CU+3bJNy>^g^IYWum^|65A4b!QlN)?fbCQ{t=Dmu5Zwa{X
z_#ZR3PAMAqv4rU6YAJRod^mA_0Td`r_*Ca@H@Lrf@0T4u*|wW&^J#bfv9iH|zr%li
zim!Zr55xp=yep)Eg}7WHTz=19{Ay~g*h_e{>eXhXzYbPJ1L1ux*yvudGoxc6k4~v9
zWi9o1tHu`@!!o4q=t&&*QG!*$Y?gf|fzh)Uw~9M+p+8}j<1p@n$(;i-?)lWa*E5KV
z$J}OOQzlnL1e~}^YfqZueJ_ajgr<`*{&E`Zhq$wh#?KJLiP23cXtj^rg%41jh3p9b
z1J3)+|CpZ0y@Op-N<msfQ`J-13W?+-rXV&|@Q-VW=E1cu+IiTRyP7uUT1I-3Gp$A@
zm}4ox>-V5=Juugv!Up&Wv0E#2e9BkL<Cit=VIQ;Pnu~<`@I*jk&zJBrofI!p8_dRF
zr6UxQ2z6&UhqRwRDAuWME*08i6XR0%$lU@uq=0ewux3CydvO#jc2#l4rSZ*3qHCUu
z{)-oBb^G1J@cH)za=exCL?jyWCb<G<4>R+tb3Foz%@4q3V!Rf3E&Iuc&I|MVp<|<6
zkjq`%Qgtph3mKc+ONj6vj#Zxok1yVdZX-X0;>HH!Yd3>rF75qo704a!xyel&lLSMj
z7Mwq;$A8vLXnk>EC7`;Lll5lArpPeAA2Q#%Hjjn(P+D5Lc`aT!_m;(2EH#DqbsL_Q
zd*LF7;JABVX&p+6VT6PsV;4hRHDaywqJ4dhLUS~WUS#q5OpxjpwLj<>CJkZySbRnM
zes%h#Rj8QOSRtar>i>v{h_z4*54Wycl+v8qfM6yI=}lo<Rl)|DfZPFfkE~Foutnv(
zkP7nX^z8Mm6U&|#?{Y0gq>0zDkT0!qn_o^5EdR}T>ksI~3lM)M!QEc<fVQ!Tz;RBV
zrI?UkLJr2Zv8u;wg*vl~f8eT?7t7Gk<qtwqr+uNTI-uxa*GIj?`xkm)07$}J<8Hs-
z$E5Fq^zSuwW+a4dCQ4FWJE#BM5+ZH@`20oh`Hz!;hUecLs{V-2f%LK2lEgw7^E?P(
ztA^1U{N07^kKZtW-!S<<zlZtFFi=8=-8Aq1v)BUu^2ck5bir?WcXQn_zfos+KjjMa
z)EIVwjH|&-jDgc%5zZgz(=X79z(@uW3;-95;r0&Hzry?LQGfoj7b8uMfQ(>dxc<ig
zt_sU&efscUZL9+F#J^?`Eg>EgSsg|K4{m$u{)tW`{^wf(30(s4tZp&<K@5tb1w(>Y
zrLIKsuO7aMAuHa&G(#THvJ#|H`Vjy8cmvEkZR*nl0VTUnPOPv3D1d7~-UwUR)gk%k
zEBy_`!EYbcB}oHdt`WmztVi=~{BsD9N#M^(?g_P^Ga6ri00pHIfautl;x-#pF0m5-
z<Ll&{Krh4Qp2mX}1AFWb;A<4%VF47x<Pg9Dc{!b;z{Y-}P=gU8lC8BIhMtI5NMM@s
zw{DXMosa-~&<CtU{J$$cJwM)=0M~8z$1p|$vD+6?M@GjZkPPYcAsOrdSl38S_~?I3
z@jo6D^$OE-4OU9`!DGd&vA}<;#rQ&4B|P@?oo;l_Cu7z53T#rPU8s66c7J~xe6iPH
z;2BVFISnx%Wd%`-ouP97ZUO{7$|r6hsX_x8?{9y)iu~`RVfVmHcx`uY6cc|$QTt;&
zGcn0-n<4NH!(2r+>`F=guU7s;7LX$?rtXG;e84?ppJ3Z<SC?L5|H6J>iD6-M0YCCM
z%`s0}gQ&4J0|wVL5RHeFEhPT43yI{^z&K#f2;^bNp}>zRv<l;9;&U*r{oCJ+um5Pd
zKQ<7pDt--rHU&Cs5RcMj8!osVKusHU9qa5ZPyfg1mcKJu!La`2HV7WX#Q?~KE}K(H
zqCMA%g0cM%@bBL1U?(<P`^TIOQ3V5f?A3b<45%$=ggFUic_3!4vEEz5{(Jw+V?+$G
z)1R}z57hvPP$ILFI~rq>K&pHC_s~vk9uV*uE~%@qK;u#{I~R73jR{zR`1mqHe<#(6
zAuxTmR9D3Wen<+sM2%@jDF-|mDjo0pcW^lB7(6AUJ*Wf>YVSiZntUI3^mH&h#@Ep*
ze_vfuvEYaDjis!>W)BzB22urK5@zsZx#-+~6eeMRljT=o+NJ!W<?dbZ!-}UlB!8a^
zFb42Cg#Dv1(4P{RHUtt6Nri$ZBg~v9wlPFK^UtxBAoX1-p`QiX38>0bz&3OHDvqNI
zmH@rz-RpmjeHMnuX*d54EWJ(w@XYVF(q*Y~!np8@_r-^x&@~OHgpG5e=;`_);4NVR
ztQ@K@FV6G<H^~kVY$h@#aWM85Kw|p^*!@?}!T)p!Xe_?~xZf7e+7CBS|JDa_!zOrG
z<a#>bMScgBDN_Opp57PV0mows*t*z*rBVyPbm1cB+pZOV$IT1M&CVM8Vi@(o1ushH
z05{Lr^fkSh2MsHgi-x)Z;JC+=1#<riu-@u`%g;b*xy#xJUATIktu7#Hh6ArA?wpnE
zi?0}dr$fum9};0;c=Pe%?e=(aLQ4IFh~vD}#&C8Jn^DX0tM*7Juf;|4bpotPScQXJ
zwGQDB3e0a(49yxhB@?q_V<uMlMXz0T2%`z3KcDmRz(t)HG8>SZwrnm=52^u6?wrQU
z;@U&=@ynmGP0fm3So)Cb5AYv!9e(Pt7rzkQaHvO>qS)N(QU1Xpru1|pGrt}meXMS~
zUCmuxT^-ZzY&f^nvjyK4-1Zqnr#BvXq(8&pWCZtq|KPU?+<5Bj>Sx@NzM+8$@gcBL
zI4SSC1uvVkZ{ECJ7*x~$91gb1O@O-9?=18%qU(S&f|2L<7oP|wDUAY8Kj&3oh?=&M
zB8L?jzNdX!fQNc3oI8X4xk^3C6#oH_rJ5+gqi4@fKo0J$xqAJM@KaD@S6*uaJjz4l
z<!gL)xwTVUW6}Vsi_6`94!~DksiHFg1p?pixNwR+ukQO6sLCGdlsFUX?I^N=^&Cp9
zlM45*i@~pO%_3g~7#`{rGeU^`h3VfbyQeG5g%B{(@XZY43P9HYW5$>dAGhgIdCSNZ
zJiP)AZu8uX*SBYGP7iJdcyYo792bsFU3FVswqM)iXwo8M@?natDTaku+PIwT(M-XB
zW)bBjcI)SU1T(4MC~Kw|PS2DMq*8CYjzd^eT-Nki7F>g<TRo4T^Sb=&Z073^E*>&?
zi=SrcvhU=iguDVuBU+RE8&E8$S`~K98vA77P#DfokTQP<hM%}sXu&_{U5&y0!FLDE
zSQnR~LHR>h1x4lZRa|EO2@e}z&Zk`wBa#3rW;>4d^Qte2oS%Rd)<#VLm6Zda+!jI(
zWOJ0;eJ{{p9n^zV7K1yyv!sODgCfsb&tq_nB>cy#>*o!iNrue;=idZV`mnG83{@hi
zRM<E2iq$}@0&^yYfxQJ)hMR2r`8ELL_o0#dPdkmOe8J~A(`?opPT5QkByo$7WW5ho
zD*=W$?Tpbj)3(*&Grnh=6Tq<~cEY@*Lb3Dh1LQ3wiakJvwgIH6I$%!l4S0ojRt4wE
z()f(wKY`L>kh|=M_n(1DeL<<s(?O{6;tYJcUG2Em-rk2MU@;nPYy$+%{HL%|&-Jmw
zZckmuF0xH9Evv!d{lMutxOxghan=X$_q$cO#TQZiOEVeU)!i>e$~{1ybMB4&d<F=+
z$CttH%}TETWb6i@W)wID*H)eb51lHTvc_(VKbZ^RkH(`}uW1Zf^6aS{_c!4^P+s_{
z5eN35jc<zp?ZNn1>>aU2I)h84Vw~i8m|$cJc-!IG5kjA3Klxr=99@d+g1W-7=er!f
zJ)_%0_a32NmbZYxd3$@oXXtDR_>muOU!T|j6l!z^B*Ooiy4~;Io3-CCSM&H9Qu!YL
z^lbn>6uX6o-dEr#uK?~RUfmCDE5s4q2EVm~s@!R#i5&)vz7({o?1*TJ8Q|W&F=6wB
zf0L>xxne6LSWFa6#Ulx&#hRq2HAxVeSeM6^q8-Au98I$zL70B(1Jexo=FW^aim~<G
zoue8eyY8SxQJ+&*Ms&wF43wL(tUyl{Kji@2E8bvi!}@bOBkpl_;FpKWpy>3E-$$S(
z0fD(iO)?+=xBZt16{IJ~n}XRT=f&<d>W{B1#jqeDpLoZ(!|Zxl%sMa}+V((8vEPRa
z=Cr}!aQzgwwyW}m;K*T@WiL}clU@GhK=C>%SSo!fCq`u#!5Md#%&iZ_YS8et;S!YF
zc-DRC)Ps|AeD6mG4AAU#QSLon4sL|+4RBw$l99Oq1BJbjNRNjikR++aQLFs4<<UY(
zW#P7<0P(HwfV%42O6T%AR<h|MzCUSPU*8r@_^#P3B+#a`Hs1J=VUx?wf&i#swZ<;$
zO^EJ^=d3<Yv0Ki4UiJ1iVRQi?Q|7l{Df7=Pn$vN;vv?rrltcarRLBkuVjNYv8HN+{
zlv9?M=xz1^NxJqEA$*`;2galnUE9J@c)h2EUKlbXj@p@hb$y$llhgHKbRr1mdAGi7
zc(l`YY8!c&V<@^U1NgUzK7cDRem$l?7IKORmYF#4SV>!1xv%R5N)0c?!_nDSW^B0d
zQ~X+-!rb1L?pyFnfe&XSy5Lvt51U<c+sj)eb0O<D<7lJqcm*lkHs&C^1}UHg4y3oR
zMwh@O=#msm*_u!4m+(e<qg?3ag7YMv|1xn0SHfOOia>;<dd|WAefcX86{{04F0@gy
zAWcEj`-X{5A<|RNTPL#AB%!Qg;ggV+!<_G#UD8)^CPNUSi6ZR>Qw5FQ#WA9ZEc&cb
zkOS!)4FLms7Bt2J*f=%gEs74*ITATVLs@j&y)(S%iK0nyDY1J1MSuOyulJBI0jwCO
z91Xp0bzmq*fOU86UMGZ#w)e-!YY(K}L#P7fZdcvD7MLgweWHnieC!s{y)M-=++k2D
z6AYC~uYCTa*He6z?w#d36A&*RJ$plLgDV<LJz-FFMshzCd+7%ejMiBpEU(jvhVZ$V
zZQvVtFgpx+TQ{lXhKcMxjCa8jS37+Pq&SCJ5YDOh;@9E8^8Z34q)f3h$qP1*v{kC)
zMR&Xm#v%w(!)CrWfz=V=`lW?-o!WDi4u1UOjgR&K9u{ERKi=nxxN9`zOvD>nA(;he
z@1HKuq=u^aASES^iS+LvpQKg;3uKnlEL$*xI14wS+N};{YSl66X(j5CvUT$Le_6G_
zTHRZKQ7BlBE(<9@b$7T6$Guasq=McwXkREx_xP%pOC^XQEA-A)60w`qDUmvfXh9Hz
zL=$XSS@P}e9h%FSkZQ9I!opVp!J(x@CbBi8g=swNI}rKcR9bT$%qYcJy&3!QG!m#k
zrt(!V2hnA^ls7~W<KCIKPl($Q(Wub<);BQ>kK~~a;Azj#W~_b&lAUqc<Q;=|3}d&8
zgDWKCqc8isX>BFwc}e;{`Gh=81=L(%CogaEFf?M7iDCni`Kdba?XP*#ui(amGr4l9
zBgUfJCbB#<!!DU!6BYC<r1C+Oi@NUL9c5`;$+3;ILU23SS)3$!?q{(FD|CmF-j0q5
z5g0XwviIGUDXCbfNc7lSj>~o5^PJ5ux%LuD3~`g{9De%QJ4#4VvXi9szIm@#w8eA#
zW6_R?g2pJUFjmRlw<P)UCwVjzOzLlcf7r@9KAMo-5y2TK$rK#RK+Huc1W_p>98NCz
zRAfH|*W@*sc?|LQM!_xLN{L}t$%=KbdzFmG;0BAOL%!z0haTp1zKyw)vPFvy)|)g>
zFtq&~%X}H2X<Y+xlu|JC0B>b9BSox&Lc@{74LWHGgS-H$Xxm=wkpsCe_-s-|tyh<4
zZPbP(0@=P4&+vImk$CX+?{v0A*Mi0LfXMZe0ES{aR48mhKFzTpfuQBI6h}dndyXyF
zZo%&{K9igg&YFzk-x(o-l;wyatHL^=1jt6%=c+7-8=V5DTm`D$xgdp{Ors+6ra{48
zw<pZ=mN+?u>ljhD9CKC|@k~1)9vh(`f<dhKY+5KzEi>3&pC3fK66Zne(E<~|HKYI`
zS6!DB?8*Y8+MwHgdB7$#LFIg3q`==2KKm$`C-J69&y{Q&$rF4FitQV-B<d#Q-5*XF
zm+qU8n}0aVpa>C1If_8idtC$Aw(l=eogXJ5OF$e6rH8CYy?R^Y1=V;{LElBmIVe!n
zWVNj}9^iT<g}rOC{r*1jHApWt*vrtBTB=|#^N<01nQ%%J@dFi6+a@q*u>vFK!EeO_
z==zC7mhj+NR>7V0WCWZtRBX3)ED)%`DNWKAS#*F*CzZOU_i?`H-Pg<%J3N+OX*0GG
ziYoBen#Szt17qTqP&?;+r=<!eJ>0}e@xcUzi4~!Fe7n5Rj9?YBt*EDCy!a#UETG^z
zs}92F!W>u(6l}pYKaDVK5^_E|!#zN7{sqyKdcVU<u@QX`6h#nT8C54w)n;}i`vXSe
zHYT2}LdSK>D$q<)hkIL&0ntiQ>jI)$*u0WoNCWI_7=#GH;GsM$){>gmJPh$ik09<+
zJqUf`pGq#&yJ2$dbv?KL;sWTV$Gt&Vo(t19MeyiA4@?884z`H$z~<t0PWgi<nl|t8
z$s&m{<L@l{#3I^YflR%qAlL{__E$Fvda)d!lO<jJ|JKLsIW?RFKcil$g*`G-U@f#0
zAKanHxn>HD>4-Q>2iBZM>qK89DVUu?Vuhv1@>8G&!Dw+-+MQ#5g#Vp=ag_Rtj8hk>
zI=h--8FO_(lRmne(FDC&YTsCsD(D0GblaaWqwiU8@rkoT#AQkE5y*e~?)VZ1`9@PN
z3-<G-g%m-4I<n|9a{-wk!`(|<iLBZvF<jb_(vWu&-ve=_7w_4GCcErYN$D132c74-
z&0sh&E8)CdnfJj_Z_V5d0+!6zzr^LBL%}(Vq)#STH{QI_{L!WN^rB*-fcM_?2n5V(
zdesnvN0EwEG~wd$>Ns~eLn)uJ?HQCnModB$7ey2d6|6r?tA;RiL`mwsyc6xDKp>uF
zYtkvw@c2B5(YllFGcg`KfMz7BKollmnrNw-#VYB-{2IwK{mVDZr~73`1n=jw^k5M^
zP1KIC+z%p##CN*pZQ#^4UtOz>W5Ar+pGDsoabQ8CguqN6(_K>lNedK{v_vag`io#@
z`X}bfv7+q2sV8*MFdh->+Q3iu1epBC#$LSoZNvgl2ac>V=&Ax;x?kpZI(fpZJjp~D
z;z{{MgXuLTvLBDk1?Od{pJ$M8S&hw0-iy+c*Bk~MrY-t`I*^53`9MpEtSZA_pS_FL
zV73B#i>?$49UCrVz)c_-ZqG4=`U;f1mRW>CZ)LtF${gPQnko6n;1l6+^sNUfOy)+A
zL>@wEG3(#?5H4#OHFLj27O7v~&XkzpJdW#Oji<x9Ax=&>_Tx-S$C2pThGlM?Ruw><
zJn5uL`3VIxw<ww8OBkRzQGvmkzZ(%s{uIMOc#_P}Kx7s8xrm2@e!tB|dADPVww8;s
z1#<eMiS|UIv>s5l;?nL>`V0c(<L51Jjxn1s<&fMm3g+byfj#fuRVf3BsTHR%zTF$k
z5azptqqwzK9So_IiiSdji|;>%$qA^0<OZ(t&bGI>XW67|bIax&ay{O;7Z9t1s1Rf(
z*X^gI4Um+)OaIJjxVP`+5Sc2<kt#_07i$f8Ukizd=Z9-Ju#VX1ZL94l$h#)_WXw(#
zwfyyf3k(|%t{f`&nlcCE8ia`uYs!VZNBHi#hUKW;CH519UcXK7Ef^tzql^m?W8zz$
z*3}Jx+PG~JW>P}VVBZW&YL!^&cbztw5mdD&4YEaSy6uItLA~sVdnCNjYzb1m{U$S8
zfzp^>rmQT}ji6x;#$U);rC2())LjLty01?Y3h2hLj)0KOQ~mnJi>I<IlrQ<DNFbh+
zTK+oH`(1S|TdFlol)<4nJHnLM9~+_*as-0jetmOop;uEe&e_(F%p;O^L{I=qN$uYY
z6-gr1n&sb_B@aoz^E@YPk2=TAfMx!7&&Svgu_z~c^<XBJa!;XHA&Vupl2}e7zi(e3
zG5_1fQse&hl*oSs=z6vF_Db_XNE%E84oi4{rgJt$-B+sb{MOwaXcryYR?s)1Ge{E9
zH)9{>KZMxwju1nXq@>N8qkR>y5m_>)mci_wmyhKV9+q;d)8FFu6tWds!0(8l@M^S{
z3Ux%?8cqssF{t<UiZlzw+2);qZ|{8OfwAsMojMf+>Li`z?&@t)Au@Apib7u<5<ZdE
zIwJEw2g!Fr#$J{>NW{(j#Y7#NIA$9Psb-hvV)luQ+Leu;uK4kd&pzPir%Ou{0*>|L
z14ftG=fR21^>!wu>zef<M)x!L)^74@S3q#wDoEy1%!V#Ry&BI_YfYZPJ-@uws-C)a
z=@qlgUU%k~YW2;Rmu*utY%OrgWo6D5!`l??N*F^$-AwVzW}SsBfx~Y;Cb&XVg^O)A
zso<0`o82hbtwhOTtTLyKnt7Zo{5PfgIqz9`&Y6;@;ZGs}C%ga|8kxTmQB*}gw)Bi7
zw$yuCyv4n|vt8wsDL*ZTr;M$S`+gZxJs7JPov^VbS!S);deif8@nV&btyJZJ%(h(n
ziv6g`%Zr`Nt+~xW(B$Y1iCez+@001Lt`l57X)~oERP9=lXPzV856(AiQ>vd<`lTP}
zja6OJsMIm~>K>lHHV})Wtl^u9!`6}J$hS8JsqmvzQ61?|%!Sj42L$SOmAlxOfAd7$
zBB3=;9Mhkzp0qw`aqqo*{U8R8H3eUcX!X47Z-Dv!8?Fb;VCy5t-t`y-cQ5<Jolczm
z%<cMNRlYsjEyrPakWp^f?!mn={5DzZxv_GpL%y#-AE9A(=7Di}bb|NLIp6f<b~Urx
z(sKjt>18%O)9r{Po2xUkP&zc9)st!8)TR#;mWWZAF9w#3?T;sARP>)p4HBS-2PPcU
zr|+^FnQ}C&Ha==MpdkBJtYHXuospTSc+N|{Q~$Nj%Gg5Cq-UdjroENG`Pmd4N}w+w
zF7Prx{&3iM4~<sMkDXS{f6I&Z<!>{dxd68pjfPL78|R)+YHhBgtix@lRPEo%wsH5Z
zH~JzP?dQMm+T<)b(OHkCDW0zDV}oFFgg$D>D`-DmuABu1dWCev)~W=m=7B}*7H3fa
z2QU^XlfU<@3qAU@DY#~%J$-9wnnh)7v&mG%YbSEj4IL$4ml5Nk4KI{2n;!EB+hj`c
z^xUW)-E6JgLnl)!`%tVi7%Lqh!)!t<Z8VFM51iJjWZU4whsL#Sn_n}_d>)(oU!bv3
z>?)nDYi6_2+e$Tz?zZL+YKzRvF9ds<_)3?~+H6rgUO#&7ABK}SRvY?mow2jkTbwm%
zm3QCj$7}wQCgttj6MtyTc$0=&aI`$4?w}~46nUS0%6|DZ!_wj2V4~pR+n5-h^n^3s
z(pF6(-w<rH!I1r#{acaEbdk6Xc2~;XN~_Vd$m%Rkuo#2IOtHW``TCvg@wH8=P``+S
z04Nc_@AD66|3J#X9H9!BIi6I^{zRlzYmoeiIj^~eXV%Sm*WR3Qq=2gaZJ1EAWB+qe
zr_u}(`{7trG>&V93CZ&0H3y|%M~yw>We-k|gg0iJ>g?8`=q6FVZa=DzGT7h!W>cSL
zY>y7k-V^B=bn7V1+t_PN)1Rc-`Vj0TJ5ZxvXKjzuZ1eVA+D0IB;hl1bG?CFp)|JX}
zejSx*RB+qZmw7V=EcPB3sY|EQog#;OtniEMzPsxu(er1G56&!nrY5x`uGjuTG;T35
zGVT4n!LI_+Cd4j@zs}P6t;o=m_!rTFEt5>sRTUg2uW6&UJJ|Uc4)+fpoVkoi&Ka@1
zXGhO=YB{ZCXF?$4WNtol$xp`;n-|XRyX|~6&mJJM+}!w9L1ld0-+q2rnsR0-gs`NU
zZCi5M;~&W#Bux8#j!>_(&GqFE)vJr~G$wDaAwDpQrBf4?vIbPGUVe#ID~)H-P{mlv
z@F_O}15G18#`(`1E7;ogcOdRrXWP~6rAO@#c<P=RinPSh=p|F1M{MkoliTsXI7XNx
zpoOw-{-bgKKSs0s?aTywD_NgwW{iW6fgV%G=Hi`)Ck5Q=)8Q}HN-H65W5}}~<&C1@
z@iKNoS)+BXwJTXcD%d7H7e-ULwIW+t)Ao+cbvy`1#->l~$xXN|)iZtNBI)-nvJF7Y
z@7u!8*AXGtYI0V$SG!#2<y%OUL$@V5+nn~ry>>t1g}TGi0r&UtJCLA6?|$i1I&_31
zb9-hwO-ID3+qZLiG|~_!#@iNb+Y2R@N+ACL=UF~)R6BN!d+%K1aIe&R7I5A_x$$I1
z1G5F=E;d)tFU8{W>XEzu-bwxFoAXF<vK?9_#K&!A;&MYO!I21CVnc&*>$IUpJ!h6B
z+y*7iPH<S``e2%{%zh?(PO1K3tyaatH~YmU!;B~EU1V<hkKu!DQ{}B+ToD9x6P4nw
z@X474dB$II;tb&jF*|!LUpKzh`j^g>Xnof(6(o8_lR%+3HWyRoOTDl?+SsPLz`#(K
z;`Mg-v~eZqKC#=49l{Y>X)*Az`?!mHvzhzPv-i_=I?DE!4O9)LuwiHgugimdsM-i-
zH>O&z3&hh~Rk6AFB3(Q4gD{g;dB3-eV3U=6Q)_xWF@?{svYfNteoxQsts<}8GW7fL
zteSRngy)Mp_TNUP8YF3gZM{<7Fc`Q@8&20$#Jd*jZfjp28h&pvch;}AQr=jD&bv9i
zzVmb9D-}X~;b~~AG1c~+(D!^)yz7?P7Hl<y?4P#<w2LER%(BBRn{2#z>J7C@^bgIm
zc14{Lj@${<&3zir`Zx0pTPZ}x`Nlp!_<`_;Ad4SY@h}`JllvU?@84i&aPw67MzyEv
zV$#OuX4kZ5C&NPxA-V?n9b?yUust^iDgiX)vZC8mT!MaMb-AfG&O&8pXx5OmpPG4o
z<$rRv_Gz-yoQt@8UrJ@rNLY1KqIrCLXy#e2=iz&JZ>W#5NBCnGqb7Ncjr|`ZBUD|e
z8h55{-Er;^u@lgnH9}yMSBMwObZCk;1*b97^q3RzMuQ(=;SVp|y01dby>;z|dN%tu
zeGh$G@7CHzH226QOi|eFOHOU@$HX{%zK>J8Bk<F8{(CNtWoO;)sXSks#8n&LOrYo+
zigvJDauQpP7sX-3^(ul(r0|7z-0>vtfo<+`=7Yx~S0V5peHY4KX)J5!HWaTK5(+LB
z2^%SROy3&rEY+ALbYF;`-B{lm5x^7m?0_4uoCQl_n|csf_0kA83kx+#oTb&=OwA?-
zVJ`izbuG*tKppI|KgLVJd$;gnJM!R1vAnN$0jykFO9xg4)`{#>P@{9}gvg?R8^7H|
ziEe*LGXe$QH!iQl9M+Mc+Y?oFu3k>IlQc6-pByLNDu1WhDmr<5Z*~=Wb;qc+t=wI)
z^YIdofD20<-FJIbR^yc~k0-kU*yzj%Y`4~zMkHDdE+2Pn@o5{oN5OY|Uy-<f`fj3g
zE{^oeIKMQTE_lL**L?1!Xe9+cCl0fd;=S}N1m9~y!W5|NZZf3i7-`f|*07?j$_UPr
z2J8u)X9O5n*Y6d^-598_(HJOKk)w4?$l#!{c6*}q1r2*(v|oj`ehS}EAHQ~Ud=-Uu
zQ$b^j0o!+#EwfX)GPABk-7xR)sabYq*Vkd`?L&A?#WR^&l4(DhZ4-;#H!dGc2M^G+
z8N-d^7sl0{=WmHX*V<f%&&GOY4wiOATMr4{8^&+4S?e3RcbpfO4FwtKu?A@cUe6C6
zS(zK#%=ImYq7jKt4Kbyn-@%-^^}L8j4IB<iBp`U=QJ*P?f?}`Rf)SZVNEryT!QAd8
z_Q%LdQU$L<b1{{l^OG{t&b}ei+KTC%Cw5*%+K>F87lv~~zi_0KUoi02xw|f@7}iIp
zZLNRaS|V(a_zbyl3xK~`kE(x&N;s=iO(a%KX2a2T{52lt$Etr6&rASu3;vjGIHR{d
zZusj&^T8UzPH?rT9{y6XV^GNt-9K{YH&=Qr4+$_|R3A!w!hDh_2E*ajj=UoNnx|hZ
zjtPEtW5k$&hx?s`EaI|9&sHAbh*J*dW>Qt?`D`42*bNej)&8{QMot%m3bSi(n5s)v
zq5*K&*oTeTs(4q&0|yQozHyq+K-!j`+}3Uqx0KpKJJl@|aOFHX(HbcL&u5Vr>nKD1
zcHjg$s{_Ok30KZNH``PWte2>Dq6-;mzXWO6R!_pvRr2~ie}o>^*aZL<SC^dx6)n4;
z9f58GoINN2vunE@OI2=D$xxzZebef`QlG7up_a?EsMK94nK*q1P2tmN5_6x%6BgW;
z>ozw~AB%Y_G#;mF+6Pcc*d289mg;U=cNcDtWuCc>c&G5V?{G!CshjZ6QoVOggU6+h
zt`-ZLH&nuPFH<h2Iq1<5)Lg?xm(L;`xzI}~d~p?ub{-onpb1!@mZSZYB=2V^$XO!9
z>iA}Uj#XLC#B00R*ljdWu<19pvtO-WepWi$dUbfoGd^Q56Tdr^P<GkR-9A_sW>BQy
zv1xy#y{c8FH9%d?>X!i$8{U=n^N0LXOi-UAKDP6lo~?}Cta~B|9XWHstycNNj_v;Z
z%78GB^sQ2xX%Tx3n&}Ai)Yrl3f{@bVAG>~Wsyx|axhKBsSp2h-Z4rZQTRBaUGa;@#
zw@fweo#ANjCu^qdP6^H=qa1B`YtDRAtyX-6a?w@VGn6liwz?vgroHNFeT?;!=VMAA
znk2A|tjl8N1%i=CB_y28^(W!v6g)?Ce!#GCj^!X6xA{NGN756wg~%g&o7$q%A4iO6
zJkfKTGF1%`^O<fwzhPn;AKCTwo~;eWmdDInc%_8^Q0~}zo^bbU?<3WB8no6g9G%po
z$yhEE_uS%WS+<r;5F1`UH*#-Z&>t^lH%VwTe-8(fVG#o6t8nsd0%=e=YsRjsU6V1R
zTP?UDseJD;^nB(T)xf6YW)(-?RO#H8Uw%Hm(_~J0&--;V&L72@hNLd9_uO{(OY*fC
z`0ivfGwobhk^R)!*2^U*-uJqar+m%k)-Bn&x+l(8xou^4l^gurwfmRvYxlo(+r^1N
zTQTDI+q~Q4Vf-r8747t(X+J&<K3=UjvzIpovL1g!RBMRc;l6<JtK^lef_<-r<*6hP
zki~qnS`5?JUXYp|xB=VhbRci=qP<Ptq<H8;90Q%G;dggf2&5WP{5-g>Y^^u6@|oPP
zFakcl0Yf7gayLMyOVN5^W3F)kGEk1Ng0M|NeXk(2$WFV+4o-XN^hD)2_5D_*qn}4i
zXSZ@@>9&rz3#+o9v%7HF6}9quMqFi6K-}vaGz>W05o~pI=;Q1K69?7(Z#_2Ir*8XJ
z74?likoG<IRkivOBkJ;{id%<r@2V5duEW|-{M`Lk4JOk_`)T&I>@-LJKknW#tg5a5
z8a7Y~fxStk*?@on($WZ<mPSBQ1f)ZdE|J=FgLId4#|BhDLJ*`Iq#Fe3eCP7~@8>?}
zJn!{<eXjStuKnR0Tx-rXW6e3&7{4)oEK^yglkQS1PHN(*0DJ#)vf7OUT>eE*h{Qdx
z1b)RRuUows&bEa~OAbynSyjk>Y=8e%b_1M+HbfZHMTvB!c4+Qfj1aG%;O;zT{BUu)
zW5}8fv$E!Y;eRF;zQ3hCLsEZwwxz5hrL}tgZN?)?wr_=6$8elXUbHM-==i*|+IwP>
zAZF=8$_o0nyu2Y;nO91_aq-b(O?U>~ZRs}lVc>l!zy78~wd1nO1?xCwMS;IUMLmnf
zc8_I(*1YDBg-?(ZdUk!aiFIKv)T+fjHpqhR+olF-w&N%A8J;MTqf=UplF7pSup@Na
z6yx~x_SCRPQuXIhH{*<%%3Jz1MrghZU1SUG1VJa?7O9X)w{6>2%_>geNRX`fYt>O&
zqSTo>`+lb8XDrf|^&HYFhX5ppd2PTtmZoX>6;yY0k6P$l--Xk{w%$c2uE>Xcm&jz*
ztUl>#<H+v!KfudZ?B_x#3V^L7(3oYP9_*CF^x@y-4rvZYep-TL<BPL>P49M>EX^nE
z<P(%meLoq_%w<eC*o()Hcvw_YR>TIW+OcoZ#hnC{W?tZUh6oLwKPTJqw{Y#t&an-g
zFqYT+{^8STwVi0t@M2$|Llj#0{kkh-kB39EIceP1Nluj8Vdw|bkv$VLuH-Uwt9ZTO
z`W7Kr+A(WGam_|G!yd^6oi}GUJ+n!)MohvivNAm;bCccqJc`#{n<(eZ+RwPJA9Ii0
zv>NUXi}_U8t6MwD0>GOvGuMfba}~yW(gB-4v7dt;!kPHm*)fnrB%r_PH=Lu3J8@9=
zMri!e_=Sz9spQQ_G7vk__h!8+&&adRoQF(+JNvHWD4YD2FDApcuus#HzLY#CQ~QT|
zB^9yocxL<XCGqjMxmfb7givQw5I~*nAC_PII~t%$&U|cSv6!mRqH+IP!30>L7lgem
ztm?yTnBz%#-wd{AXGD4&M-XZ#)Sj^7ThH`M2I#CU468CuJ2;y*xUczW$f+A!sUKdN
zs`J7nRL5jkhBz_N;<B-$R4gI(EsSw81!(Q_y5!^!IL%fnq;<pl)T;+ap^rj7A*~P3
z^n^rNm1G_&SrNqvO?G~Xm#=@bt9eA$9J*5HoqSe5AbwP3@2T_2X8zuAu?Mc!;SQTF
zyI=GujP1_aPl}4AeliD4{tNcYS=~)HBu&U3iLzHFF0<r$y@CI;UGaSHQk;?Sn|40r
z^W=+JL&$AnA01;xSv^P7(g*d|?^VC4x-LC~#ayO+8Bx&3(MLQOs(xB>U#)_B>iN^>
zAb$qvtd9)g-y6k-gaWp7QQ*sosrALc2XPvAfO4!>3_Ji|BiKnI+&ojsI_@!t+JVb<
zEbLYajN|V+PCAKACHQD(tQ?i}kWKIx67J|xzu;3tYsVFzpx}u?%B{!Wv4qxY`ofKp
znn-+fTp9Zu&X#FzV(5?LR9Toa<^v|paOhl6YdpNnPA7VjU}8$Vnex{X25T?OL5#7x
zW?hGGyG<U`kK1{A&eoT5p4N!#{)W@kr8aIop$;ye&x^w+5w^6JA9ilrx!=@3%rUS}
zmaTCg7AHkKsd>vdlpRSR>qFU$2bWq~=Qmi=xYf>de*>jqu_j;ErAEnbS&wse3*`rJ
zeZ@$yXgM-Te|A~%0Uhw;8vI|#Xvaa0Yo08t<-EL>J;Sn_A%(p<%tWjHT5qLy@*T10
z5S!eup(#}zp?BH)Jl!*6t8i!AJ56buts0lA3aii~_Y@w5l+N4ud9VbsVxm{l35nqj
zMA1v7{yLLa;mDoF^)@k?HF6eqT>YA-LE}x+RE*WfbG_u;WtQ3ztGkGeIV^s}MxMe*
zzxayt*ov}2YH*T7`$%{~7M6d4M<1C3AVe`3%A-s^BOj6Jx7){Aj}M;kc}dRO;36nY
zDNfiNmJp{PQlof{CkPC?oW>z3H0<4k9S762zZKH>N!S6HgYA64=>X4r0^X58j)4bY
z&k6Jqspgp)EM9N46--sDZxQaZZb!Ee_BhxNEwO06DyYYNJTAuayu!cWhUESGN`_f^
z@{X5NZOJdmd?-g~d$vyB<ebl~4JsvusE!ymhZ0RbCmZ>dTO<)A?hmFk`rg)0%;HSS
z<1OTA3#AF%N2m9|{+xq`l)p~X@p8Feo0P9iGMb|K*KHoPCjXaNemVmB{wN$Jzsrfz
zvLT20>65WfvCI^d2!fKgk-IrkxM&-hhX$^F;^#%%h^32lWwZP!$+cf%EL8_p!f}G`
zENY{)xBWN9hH-kqezBTCDgC{1G<hL0BuCDXo2dZ*&&m9umj(FJ<ae1W`hjJ#t+>LG
zw-Y4t6~YjL<BCZDXMngiHvN{m*32DUFUjHAyzYA`l|Dp1vKOI4u~fRfE<2#bUq@z+
z&u>~&9;I6;%)2;5x%N3fujSBu8BgMaB|w_Liw|*CO=;67bj|G*)$>2g$Si<M3Zi4}
z<5KR(iV+o$Rp#u!<=S{MyZ?}}(@*|``hk1#>f+kW)TXY#6q3nf6R*Zbw`=4J&hEw&
z$L0tAf;R6YI7XOn$=tR(TwOw3;T5?)wjWODzHe6e(wINMA)jBrI;sFI$=}~8U<NYM
zod(Max(`Wn$$KvT!oo{;Y8D?AS14NKYB{hprbcUeGS+(3*ekgWCV9gPO@h~}rp9Zf
zL@L~;-1Wwas=`F+5s$}v?AfEDtkfO-U42L}7?`X5>o5gBcvh-U)b@EiuUe>DExun~
zUxEqnQB_K)LqBdwzg-rkPiqvT?MY~Rcx@GY8p)<6>jGR^@h8Tv`@qzYbhReSNpO^s
zfmUkIz7AzwcApVv#&CUAp;4j`7t3lPOh2}8ORGGwa9%#-ye*kYc3vc?E{G&<ii7ZQ
zxd|nnaWL%p>RJCzKO@|6<aX*7etw<_owAK5qiV})EwQ^h--!7@G9Fi>1ayM6m8F?E
z#H-8BoGwU!I0xvB@Hzw7QJmiI^gbv$F+zmay1vFCw=J^zWMa>k`zz&1X?m1>(xi)0
zY(H-Wmz-X0y{ETlvQZp<Vr;1_V&Tc?T#`Z@c6T7B;mmSz>UvYc*4@_y066{6shM3e
zE&LEKJ~NGf5#l-O5G5YsG(2FFw{6jBgf^tBUZOK^w+G9L6^I$It@C8#+!u0RIE!#q
zh^=hNXIgIJ<w`M{u?$7PXlql0iqZQMMk=N%?(X#?pb{3|M)^MtlT7;o8b+6!!tqqZ
z)&H5|pdd2M225Y#9&OVsaFV!t53rVgUTY4-2=E`iHmk4xOnV#%^8d;iy+H$(|FQfL
z?>Hu~3=kwc308wqjOU)xyirFFZeUq0u!e4s3Ik18eKSzNE(WUUwi(nxrYS<ze0ye^
z1XaTbE^doh^CPAK|8e8xs`8&|2%%~^k<lxUK{b5=BL;B~K{b)dE`qh78c@si|3B11
zb&p@>Kd}J+Uu@R~<sgzK4kLE*2Y=iYG~Qkh2>_IVu&`$UJdhrkvb*L&GM1bK4%yuE
zF1paj!n}ypG`@foVh9)*eeh!j%#rNXNEV1@l?Mljue>F`yQ%~ZzU(SwxzNIZ63GGc
z#V9aqiw-OT*q~4u{n-6raO)c^sZmq?g-GyC@t^4i`#Te*m<KM#wqN430@ccZm85`E
zbSjMi{PF%(1t_)-JP(r;sPhJ|C0Qk?C=^t*h}lpN{;5L{%(uzjr_5mhdWbgG>t%r+
zHX8yWIG1%49)b2j!7O{Vt03a_nz#t~JLbKzchpati{CLSh(X&XnEXND(KJW*5TJen
zfLo|%5SFxqz2R|G>A?LC25`T2zxoWgAN=Y7#ubONwg$n^ilAY{Yu%=k;O{Z_z<s7Z
z^-meV!#%&2qI(~@8le~twas>%(*%@F0SCPfcd~x~p2`)pr!Li7naayYhq<>YMF)D@
zWum}?nD(GkG{NVgAaq;<ZzQyDzq`^bFNa|({jw9c(Qd5q1U%fIK`MmC@xGNk^quw-
zs>$sdm)oCz`s;$f`VIq><oj5bEEDuy6T|?<#s7E$iv7ng4F$DkXJM@m{Fz-IAW8F5
z_G14o3+AN(3ezNn+V;2LiNDqM=eamRS=oOb-I=7p4Gg@SE}#egd07Vw=-G{3HEU3q
zuN)Ye(q)CrzeeQ0R{}}~bS!Kj>nXIPVu3pZ;xTan|N76z4~z#r$JSe%A_sbI0Q3aA
zoa!O;;qcF&Mkru^NPMCBngun{UaemF-$fy5<iHb~z8~lUb;v-oR$3lS1xDnbHI~bQ
z`_T<UiGRS(@X`kx#{(-6(Es-zU?@PRwsM+=gEsyhp8LmhdA))rPj^LuF?iw*PSD0@
zghS{5DCke2u>g?D75{_$=0h89a!fIF-2dlK3YtR+Klh;6h)BS?t49tL`~S~;`CnIh
zZ9o%;#d;r_IHS<SalPPl`FBxFn4lwWu*s2Sfi|W<6DKO*$K!uLSK0??;{4uN8w3Bs
z4#tsX0BJA$-?ErQuL0ZJ0w}%o8pzn}qmU~JKA-cLydZ5pWWxzkQZj#1ZDui2tyhG)
z?^&~a(BC?dUF2tGm{9q5o*5{cwGpI0Ap^;ar$Bb^^kHDha=8tOOom?=CmXpS{ff%J
z-M=uW_BcuhCfjt^>0~^h*vV0li_$vLqC8+IUnO^v6o}M$szKQ73H~y$#F6!wr_e5&
zc3B?-X*NwjB)WCX;RT2k9tR;iR>GHOc4H0CopYc6_yXmI2DYr_T*Dff*jp*5S}Y)2
zvXkDf8<dOCR0LR{RMLeB^aJPDv*_a9192db0dP($p#um~90Si+_3V&T=WPTvPcP;9
zF(H2&u;dxIq;LV@wC6R=E-Ap}b*Na!HNNW@gvWYsK#4g)pyl!5pLAnDFI&-<DM{XN
zbX)~IGje@In^_p1)k{GgCU6yt{}IN2d%WX{7i3)w!bu%Gb9ttnTB<=HaXA_x!9DF{
zaJh}c+VJCI`|`p%o~oqrKojC+E;YG*r+{g?5y&1KcK$!g4_oHFwV^KAFmkI}+Ur>#
zmq^Bl99SEEz25{1kON>kGxo8dSd$As+8529t7TE<OUnLr6ogu^QWKwizK*W}gv}~0
ztzg8-w-<3(YH709E8LY}4twOMO^e1j^YZi?V0)RPn*+A#6#NIH2Is%v(HyV4OM!~d
zEx!E)$OR6>FdmTQw+66{h8)%}_%4oqfzW9!&>2N~pFA$irCL=JfcIJS+YsoLehBf_
zF@Kh6Xng{UGW}!M1B(K^PfeB;-?<OKc#Z>`&+2Dc>HySfJ4&Op1m@8d`YnFPtc!`I
zpzXbG7}Fqx-55v{UNcBPpXvgllODBZE9cj~AYt5;y*f1|spkFWEl8N5WD7iUcKhB)
z@n*aR{!#Lfx+C@~!Sw0n>CEK@=tCZl9!O8(00bY$9;?FIVnkcO!YBDHTxOI(JPQYU
zkJ5sn^~n3r{t32FaJ-KEY!D{+`WBT_T3F${_vVE4CrglkdXFHC#AgahJrO3bi-H7u
zLSLgv`t@55?;|#(jG=s|o4}*_SJk4_0R8jO%1Mx*`EK76<m4X6Wq-&8v?rJ8No>IQ
zc?jfFwg&mD_uw2^18`~52^2S?D4cEJFU<*LfPQjGdj;mKuKmWB)V1@3!Sbknz4-T`
zn9VduSPt2m^MlxXOVFhK2tS}O%Kx;yKcZ)is#Vo2ECtc~%V$R~!{`|ZFx9HehIVTb
zpS;QNynvFff;^2Ir$XRm=tAHE1A8UvH?7(TRiJdz66RjwOvdDY@+HJr3y4oZn58gC
zM7ogqVW~IOdT$Jv{R;CG`U6MwKA?f(hRl@W=vw^o|Kz;PrHhPp0@3v3*L)7ZD?R~a
zLz~O~drLsmHg?#4j(Rt@->?LNt9O4(V{=ESxTQKXimriK_51e@FxWoXM*{=2F~}Ny
zM5X*@Z{y+gM5xe-$xJ&K-wl=4yVtjIlw+hz4CQofI~jmk372OVG6}9Y8Pw5(%RTy8
z51*s$7|>S?&wB6aMD6$-3$Nv6-rnH^wpS1_ct+6^&(WTns^`2IW3>KIZ4_Adg2+L4
z!woep%b0Mxm!Hq|kHCyL?%*6A0;X15?Fv&hTGow2o{$LT*S6pt|7F>>yB=Ai96W`%
z8HUdCVCsb)@|NTFNzCKlF0`EX22SESIj&p1WOVjTAmRIk>(d`>{>W8dl;H5M&tBJ~
zaA!)?bu+;t4{g^;U}LbAs2Bb37DYfH;zVPZrz5$CWqSn<N2t9o6&t2~6@G_yF%-WJ
zMW01Ik_!Nq@TV)kyr_4O?!0E2aIO{&7>U%~tuCNIiEz)dK~$*ex!cnb9|H6Baww57
zPJvTZclh%TACMS*V&;0^XMekK&&yN2R1#A9Szz<jg<<G?9w`&}_NgY#d)Rv@P=4-R
zHE=*0hm;`g!&%aVr{E=5(R2s#vnbJ5pIyKzwQCFe^3JUmq_A~Ug^yeUH!)pRgi<(n
zDP80*urpKwGo>#ld#m%8Jir2~7d5V@t*!*g4k#kRvuN3Q)OGA*k{!lPFL=dp*_!<6
zpKg_ghSPOhhucc=q}=lId5Jz=N)(h`&;;4}+`l2rz3Sf<vaSWX0}nKA_mj6!uJ>hB
z+B_E|Vp;CXmL(O&^Ra{RU@{`v3qRmwLFyGNu-=VP*j^@D)pvG@0^?#ayb6|^_*5Hp
z_vX9F7|nv5y=b?YM<<=;Lmc|085t-D9)^b%Dj7ajD#^NJ$Hx?=zdJ|h+}$YVVhVF;
zkevN|8l`$@5XyUx**MGe<>ynZyQp9D>m9t)dW9_@DR6HuNBKdD>();GP)8F@6rrF7
z%%R)d>65Z-gtMIZC>!gqmyZTCSW%aXy)5HA_Um_eHbe~?I;=gc!ewOV1;6)FOe=JP
zwUb@+qzPJh&cJa-{x6ykT9^Qt_w*fp{-BTOZJ3b+C-!?pLNN(|Z<R1{gfRUpH88Ht
z;if+weD<*!Pv5JQHscY|ApYDf+V_UC^czRv$%THDN_s@;%XLmdP((fc28uER1&c*0
zF|6}oqG{Ni{D7sNbGq;2Oy6rKn5jSzPM@5fS1M+x`1TSkd5@xWFasgCJjuHR5vaGf
z=TM1s9VijHsxVB>j7$_ZPWkOVuPfeffc#H1&Q)0jR~m^kj};-;!9kBl5wGq;)r@Mp
z11k*1&989Uc}2bHyYe*B_R()b9e({szbMEpD<eo9koJhHEnlwnMQd!e@CD}B5$Z^_
zQkAp60D+F0Zw7-ieYsA68qT#3b;pPW=z_F86)?0BJ(J!{l1T2zRbbwBxc%VB-&vZo
zc$CZma1)vRFe8u~bk9H>1F->9>c+_km1+WO@~C4}Fo!K%-=7J{%>2*IpG<mH1-!&n
zrqJ)^V<ccla<OkC=NZg_Zs|8&p%_9K9^UCg`6_IsCwy8yi`=e98+JFo@C*J6N#y<Q
zM;Eemw(y7$6kc<AgRAr?LuTQ25ZE_9e*#CiO0vHO?$5qLfYI#l<fD~0>tliVc&`Hx
zDwz)q?<xr*Md3Sb{?0;YYRJ(ys2jQ5eyY#6WTPqb&E2j@lF<_ygUYEloOAL<vHQJE
zn1#*;bX`^%CV(Wh;V;PwCK|Zx|B&?PgWUlRihKTVXednv2N55c@6!{?slY(D!YzsD
zC7f`}qgMnoj-~qO`<WBgNL{U$g7fGQ-<?>N9!QM_V)AEUuBLX}@sTq@a?uvlvf9R+
zygVZ<&U#EJF1d%m?ezLFJ0@xnI_vvGU?E)0&E-LaZ93fEUv-hI@CKNMZhhvj%ts-i
ztk8`+yd4!=wez7uYb*x~Kb(HUd~{S7{7l%9I<S~R=PVlA;8>L1ivO}f<Kq%3nJmrw
zKywk%3`B&HEJzAl>R~+l$?&Mj#6y-4UIQU1o?LiUJc?sRQUmTwqWn_Y`rc49N;yMX
zdO$*B_aX9b^M^zCkGZUXtT4d#R}5#7qc_YDW0Ybu7W5G&fo1!5%iSACJzhND`}UyZ
zNY#Al$^E79DFJJZu7D+pW|o+k0&{45UNVCtThZT+Y*JBI*l*7I4L-H?@pGFMvyplc
zjLon7DBv?cP#rdA(NU7DiejjV`%o$eb9R{Y9mDedh-mIJt0gH3TLRa^eP9HR9Q8d>
zLtQZ>eFP9`e>b6y?eZ>`MQrq_2i&2flm-ttuu8;aKiCLIqDYkQN+06juA&~h0`a6f
z<<pEYSPIayh|Lf!OTPMYckHRzYjL^_(dbv<FUw>f4So*-@{_AvKSh&y=p{E+n+FT%
zrXr;tBjI+$F>lhTmwcB+6ya=l<g9~<9fFDPcDn8AwLahYa&xiqIjA*9^p-wp(F;gq
z_{6_SH1P2@IlXOOZtR97`GXBnB7+U)k2WvAE3&d;@*g1%o4s)Md>fv%#$zfW)uOK|
zCmcG%947rS6)@PFN$v(Wc<bGd$O0nF4yjgNc-oo8(}h?^nTO0@1jJAu4CQu&7FtJk
zHKn|a&UdMR<w&(+3nA8bR|ci1QrT*=eRt@Pd<59dWVOE?y0WC-4Gt?>Ke{gqdzv-2
z^E81eHm`L4C8qKgYAIcUoZ3ekmp<boqB#^CmL@X#nUN8+xlgWK`$81rM8;_nzRmw!
zU(yhl;QiYg#ozaV$RdI1kI2GF0x<x?kt+Ftf+3o3vt6^kpp>IsG)D@-y(AOZ^WP6H
zVnY%af`W|Z&<l8O){4aX4y|P^sgfRMVz`hCi0V1QEHX*vroYvv(QR%LeB*_r40nW9
zmh~=_@p?ZlJcS}-JzNRAv8$1T%!ZV7Md21%FW9%GvVof@<1Ct<QeRRS2)FcMMYIgD
zgPO_~Dv)~h*m&A`X7?iPt84&+AW=Rs;uO|{th_fLECohyuz%h?v~<2v@|+J3{xoTz
zxi>uThwPpOxX(O5$gptsU4&CJODRf-KO@p5@vM(V7e~0};&<xhQI(QzmJy2Vp#qV_
z_9OkcZ{LQvpi@U_G}PJe|9isClBOAaB^Ija{b*cjnf^U0kFcNc_;Z+|*ys)3_Wi8U
zovTei@?+}NI-3>%83p9F*xFp7?gqJ5@GoZ*I;b%4GOk?}Go$Zwdphdd@dN{oz$k-R
z4$91%?}@!7dGt4p3!?x`7kOa;M0L6ia9_XH%zK*Qvd9$p4GFRS@K~upHYNKk(MUcV
zo-JS4{GiHGfYd_-k#sM8GuQ+&P$rLBv9F;L>@am&bw1o$xT9n@*pRtR^G&po8&4_C
zfwF2c?*ZzJ_M9(iQksZ<{i<~9h_j$lFlIL-KamzYqM?s4p1khm{cK)0ORgoT>hp;g
zV_$mNBaj0=m_w96zx&G_WEvtg;$sJ$Sw2|*W8p9S?<|JjT9ZSx!3~k>&Ee7VYuC#i
zGILuA^gGUWsSu0N<}mlgHxe2Q1P++ONdAaY0i3_oF9Eh<bF@~+&hTmLvfeuiqh%5=
zCXgF_a{(OF^WBW@bm<Z8Hn{fm^JNP^afjh^JO{?to1aK&XK)|$lFFyZak3IPbgC6K
z0pg72+9f~+2F(5oDgc3n4-+4T4Cf=R$w1AQm{|QiQnCw`3jMc_Klj`+!@%BsICzN7
z_&UdB1>@X@?{#`FaEx`m+A({LGw_ly9*M(P&YnVpm-{S!u7shH7|Z8bpe51dvY24R
zdm!(~h-Amir4fwW4~nKadBn{SD@H3?i67x5#ff2fpQ{T5o`(BLWs#LLbJFA9y6Q7-
z^h;un?G~pGLwRssCiqnErRKa62Pt4|{mWDwZ|}&clJ(hjlKl=zn@9Hl2<I4N$y%C`
zc~G=K$381rRAY1PaXGe^l)JDY?KPI*xDF=eVlf%(XkPbUnm%kL1}PB{Wp)&JIQo|1
zns@Q0G2Lxa!zIYq=n&tU1HSYv<2AET6n{PxU3hK*R-9s<NgFZco<jasqbKxNLj~|7
z^s>_JFr~`d1Q1`F^~sRgi0aJVJ97Hs+5}ksg&DppRf>9S@MWjlqTue)!8+vdu8<g}
ze;c1<TYpAG)K1Eyl+t-Vypo^i3PFq@qT@<A`yif=a6fw=bN87Aov(my5Eatf33;&_
z{C_;T?FL_|YDL|=UlO4k(3(zbaP&itkLns7g(^(9E58Ui31=ocZTvMWly$>)W|U-Y
zG@DE=*6b7UQz7G?B%e6CqB%iw<p^VcSt;U>Y1Fv0l-M8f3tXv9`h!#`OovF`IU7&Q
zggE}e6FM`|`nMl%hf{RCK<uFmMF|imugG`fIA0In<oZ7M-MC@oGi}llmbuTJXaAe_
zCHOAeb6vW?hgmiR`R!lev%fG?;CDL`X78%-s?v=s@FiuINWMgTzI&^jMEVXH!3UHG
zf)^*<*V|Oo=VdvY>?i|W*8lt22k7p@tvYYb?vnMfQ+y0BlJKEfO++plqThVG2Nwt+
z>l5lD)TrQXv*>;fbI9^u`Uq#1h-T`Gmnzog$)KGVKk<D7myVYSd|jMMSM51%j-1?~
zV)bd$kfvOEnecyv8xY~)S62OjwMJ8zOkmLv<J;#7G}Jzn@KmHvwr!;85Fzt4RUVo&
zgYx$|v)CH<YAq5wrym3a0jbQNqg%7Q`QLCaB#rpB3gg~`eD57)1(gP07TKsPRedm{
zKGjK1>+HAykv2@4H~EXwQ(sjRfi-*9+hk~wW%^eke-%iB0}hDgS6=9VD~NEfX19`j
zS6zGh-}B=CT;&Onyv6NF6GN~L97D3(%OxZo6zlT$pH~JH7xz}><1Glr69o|Ab4>M`
ze<O@n;TkgFi{inbz(Zw50GjhX&;j(|-_RN8zr*hVw3GA8suB1f1_-sM&(*;PsQo`L
z8-s!gb*9$<xSLiAp^XP;rhEUo2B8RpaLiW;oKgV!6rcrnFcPNgg9iWOC&C7h(+5jq
zLluw`DFJ*E=LD>I{JSigYh3`VAIN|j?llO3**!Aq0D%5SQ2;{*Ps}1`;}Q?>QE9Ml
zKIk+Z5rYoofB%U%fcuT&YFq%g%VYt%%U95otnr`MfKkB!9r6CdJzRiC(<nd$M<>$c
z&cB~au^Y58(Ai<%0{lxnK#k)}GwM_ST^8!IE%ZIW1dT6^J*Y9iqCe)}MbTU{1y3wX
zn4}9~z>(kv8QbywAOF5ATnF4gwpcU`3d2qW-KF1~D)DEd`1_SNo`RvH$&vO3cytFL
zxWkzpPUiRT=OW?&ZG4rYBM4#UIZ&@8<ZXel;C~J_;^p<5w@dv&E^n*4MUn{s9zDcF
zfAPLG0YCw3;8s)Rb^TUg^q;atC_q)3B>ye|qAMT2?!L4BBi)<Jw7(VjJsU!4U5bDe
zI=9&%QzD=J#}F#PaVU$^j>lgl2h&YZ)Yr#BE;s)<+d+H8t}&)KWlRAJ6<{AM0HKKh
zH0}a+!TOLN5cpgb#Fs3j{S(mY0fFRU&OiQi{omi^v=y+6qDOD3yw&NGtD*w1ElmsP
za`xM*HUB$HaTNC%z?839n~pN%5;!{(c&q{PRh<asg%hR!(_P{3z^N+M|KA;q5{ZGM
zLdroUG3mo0fQ$%4ff0kt<xqjB%D+Djhy{+gQnj%K!IpDCvx;V+W|T4j?{fz~G<ZA=
zWa0lVn~2m4W|X^9KbN2`2+&rMA@G6}@Kx7W{QE+~;LpA#7cWU0cE#GZa><_uf4;%*
zSoYi1K7TamZ54F&-(H))zbN2+jJndiH9k8~q>rv1+fZ}oy~fcNT5++?6ZzMX1Z29v
zLKmj1N&b7p!=Y&By|L#9F+j-0kT(L}K~-?@)B!ns{x?>m<0`n8LgX@67BGip>b(fX
zkI>||U)qOwyubf$h=bSLaezTTj{jio1%^xvC^5gMA?1&f?7#h)mIl49R`r1SZ`TzJ
z63K^Ff}!{kyML_&L`0zCNx3*5ovqOVV>r`u(AC*|zg+<B`H%JWJruWqUX3j6qIBJV
zcr6>Oqs6Mmy1oBe78Mo2W83oR03OkxBt<+;6}(U}Yu)R0|01EV1Hof&<;a4jYz$Rk
zM&Acdp`7e2Sc4gT<tld-G)`Co`BY_hu(V6<Nclgh$Gvjn6a;PyU~EJZKpp)Uab9uK
zqefRUtN{0<NDCgDxH&rrRD&Jxj=Rd2Q;W<z`kfGwiQr%BUMpQ%qukc8uLsZaAJL&Q
zM5%t{C|M@)A^_0?mRQehBZM-9h=^8O*UF%7OpC;*tjVYUEYqO<De=KyobTT`SyAXN
z+V04<J7G{Y&?W|c|97hKdrUz8pI88||C=p)Rp!nF>|BH`CTrhRw~?7^fLxouU4}u{
z2a``{yFt_i2~a1AylnQts&2-90J(?Q;vroVC*_`&om=%#N`t9|2!|$vs`M%!*yi2?
z{k!ECx%iuB_+A#COdo)Y$WV#i%;5!Ccg7L`Bhsz?>uHPwaCRC8R6tGlA)s$wCG**r
z16pbV!1MZ-=i8oLO`ZVap13@pxf~0=%kixVN_mS0Ze4}Oy(uq}w?X!Sv3v4KCIBTh
zZT8r%oyP+x@#zL=N6*@k@+Rcu0aPp;XyDMX87_t=y>DcyclaMJ?w6KP%=7*xg<`!U
z003(N`Xx}U6%h^jB2E}a$rS>AL;4M5G}Y$113>?8C$4O(KQHQD0*s>uEa8Bz-C*Zm
zFRX}l9)q%^0y?kh5n%AZO0Kl^s8`_kvr`}x-ULo^`y;Iv{3p%8{p{~d+#h-nWaTIa
zsa1PZT-KN`D>eW~DKo0~u~#jA9Zccdjo%u%C%_!$(QE{SaoQb-{cU-;b{EX@IiSN(
z!^u>{GCURg@*j?;{i}aJpyO=rzI{4sS+^K_0FtCW`Ynj;Xd}1Y_~dZB7&mcwaVUIw
z*~`BzfL@4{J@cOieKpPJuwepnnP%&_tY+)}6x{8gGje}zkaF}Ba`>5mK=w^5K$v+X
zVWI)4!B5`u(S|yTQK1^r+<-rr`%^k-`0DBPkv9Bl#;FFWT1Fv9L13b0Ca#ylGeXE#
z_mxD;iB=^TH@&h~==5ks5vg5%08X|pS5HCe-*J$OH*RA$$xRrOpD)av#X~~J=F1H%
zU!5a^X5Qr_15jY$SI~os$l<NOad^S0H_npQK(g4l!-iO{x4=y9$aS&Y)H;g)Wb9%p
z3C;Mt7c9TWkcvg`wdg!p6+50>wKU@hRpsd>fac^1u=EQnq>J=1;aAou00y21l>j%r
z@t3>Yfgr|IjJodXB_+U5@sw(SJ=7?$KU?1>Cp)*O9(#R}3QbM+nF5)Hd`vVrM2tUI
z*MaEdPG=%HXOQHxUkiL#ciBGR{9pER*@%ti-gJ<BB<B3fyHwfP86`xj`$2q|{KE2G
z;r1h5r)DfhCufCEd(<96zc@-?e@|F_)Ox!?#x?P_$g|%hP@|R}|8XfmK)QTnRK6sL
zpV5c~tEML0SB55$8v6od15WLiG1LK8_#2aUz57aUkpsSdrF17gdzDZoS<monUsOy{
z_4qf*?i>1!dwr6pl_S=l&WDFJ&5IUut};ukZ2*4U>=uQMh|ItqDB^fU0C9q%Sp#{F
zlRZvWJ-P?m>B#(1iQ#7afC%)oZrg*1P$(rWNPSu3$%4S1s4}n7{)D912U%-m`N!=?
zbe8G5p!9`$VI2$mSvPCUhVGkw0#8#1B_tWvb+t_7waYOcthUsG*kc8)1?SpJ?g<M6
zFxaj#`{#35XrzS`p7J#X{!LC-;1B8E`U7wktCt~C$-<sZr0|l4-9Tunwta9MGa}^&
z^H<ROEn)*@*Z>%wv-6d0kOq@8%Ler-Lt&{2@Uz_ZYpVP!Y2FrJiPbA7tkB<G4!1TJ
zJATc!Y=V{G&;VIYkbSq64dM5;15D&Wz^`!$_ZCVwDX0>aEOTYKdVp>diVW^xJ~5yu
zG#Gwj`c*;bI4hNK>&v);cx$+(9LBx%Bq1o%Q*Q`cBQWO^I*Hhm8KzgtdFt`IyHuYF
z$h%&HdW-aMfLGtab_r6Opivrv#-Z#!Ar!p*zwhZpFVl<19(DkH{H6W+@P7XHthjOq
z1797ODl!cuV4Au(k3iYI6luMCThDcqi#ond8-ytU=eew@$X7jwFE05vE2DJmzH>EQ
zs*;_7OwVh2?c+3okC9C|h>f4`EX3OSUR_dBUOzMriO{528@v(FPC<}O`eDa|ZjDo2
z&NWdxf~RX~dudWI+XZ}$bufZ``k0fPEC_7l5?CfGiAqWJFG#Qqe~Vr1dxMOD9s6ml
zj8_#X9M4_1FhRkDKG1dL-Sxea=8IqH7B2DjhQ5E6SPzZG99PpcAiP*d!znt+%6;+I
zP|1x4<6R>Wb7Gv-rEsxXZ%#xg{xPTWiqz~v3s;7pN4Q7_b8&jgi%Q>d8+@UowDV=*
z%bgj~YqZNwg72?5zk4gz4COGAK3x<OEJy9Y$67`sc+V0g1V#F_oMWTp`8s947y^)<
z!QeNPS_lB?7GMJ@-kt{&aOXV9Wi4M<s`*BQ*Jrw__*2?SQB@_t12z1t2CUfWvN(oq
zFO8O{TtjJFI6soDEH+&3={MRbr*r7DJeY}!+u}o<&ftp$DkMLx#@)-R5nXdHe2RFU
zwq_T@qSB?>P^phJ#?o}>z-g=z<#$UvlFQHr#V+pgA7amtXkIj>jOrF3_GZYlQV8CB
zyIl69lL#(_sLv7AdBG3CvKeg9?H6Ma3zK#bQ~n+g3tPo-8pOJ|QPzeh;MD*(CHgH=
zJ-1n3I=9F-q3mhGS=`&6Sx@F^GvKtHZoewLxq4k=Rx~rLRDeAUTd@u1?JA;{Q??f}
z4}aUPr2D(HRgQW}wu?;hwNc7(d;;9*8THMdfuD67oXS9wnyFB}H6}FZ4i#sg`*jko
ziTA%G^vhKoqj|Hhu0la>8Q^<deNUzE7Liv!1K9Ofd)(87ovVe2V6^^&W*~>zPWp#x
z5D;_RaR*^}^g}@`1BT`E>t~CZ6tmbKtqDqCp2mk}qn1ivY(c3(q7s+7Ugi8}m%lxB
z7hxHx4<2<h$ed*+MgdfP?Op!6K9qA0!ISs<kr|(n9*Y2iO^N||5BH1ro+#fvLSNv2
z!|N+TPX_by{)yQf>`IW1!jxM~3(|mG>e$115#M=`#0csY9BEgOgX^?IC<{9TlP-(!
z6=9zWTwgL;4WvC1_@uZ%-RT9J8*BMZ+Y4((8qj`&S*3d}If77$!(eCJ5`X*%RG~DY
z9hDH2qvBt6g?yNkfaqxe2E9uN32+y<bHkYo16TQ`en^`1@cTmSx!CoHbb&4q<+8aA
zWrH>Uk>(XPE|vk;kt8B(<}TF7AYkXpa=G_k-#Q^9d6TEu<u`doUwW4x-nI)V6m4w@
zi4Er1=FMUtI0yXxK?!YSR4O&5-cqo~Y!}A>>Y*EO6ch+k1r%w6V>92vGEB*(6Of0l
zNIgHXx34af3q3Qgy}$wfod?2>N36Ino~be!e0x4HeNTOX7<`oMLLViNRBN@TF-03A
zFa_)@$82980lO5S{T@Rx<LnxU+;|*l0c^TtP)@WVoLLDG<%fW~j0?DB18n)o9<ckH
zAPh<JE2WryaBLBnXb`S*ir+;`z)Io_(?L4I8KUxT)R6*LJ9Ftc`g+rja9SbalJe%z
zK|w@j#(QsLq!kkXBW9#6;NF>W_~mnOmAzHFAvZfoc1y_3?#+{5-l&{i1VN@7K=qwb
zN6%XQcnu0sK~A`|ejQJ9*jPIrXOx5^CTjq_($NlLNoI2W{k)}C2$g*IkU4!YS4_H$
z9~!R6K6o9`Fc~6@i-hdGf3#(acZbvx`RZM=!yNoO1kb^KITvhF{{z%9=C6Su3oiS^
z2yRApltaBsyBjz{Bf)EOLy6Rkn_bQ4IVy=>lsE#*c!6X-WcamgE5leVaP>OlU5)0&
z3Sr{!7n}_+2}(I3C${b;++~LwPtRlWXZ#Sweu%(1CYO&|dW=I)c`bI?heOHnX~sR*
z96%S1+hE>^D3=-mSqYL9zms9VdUP<exG%Rrx1Q3Wc(L-CLekv95A~xoHcC+JxtpGA
z;>WavalwjxG;dYk3M4mGh5{Y2D+-N?KoF|pO5(VZJHKWX@kMe&{%?{AErX>|X6k41
zPcm`MieI{`TfbDeV4t|xqeFB!4&(}zHHQ`kkT=D4aD3tccAZ63PPJ*``0($#PqFBB
zzVBvZxLTYQLx~R`^)&u)SG#=O@5VoSF3k-*fhtcQ`jWn07ki3YnZ0gbNvn(^Kxm=>
z*-zkq@;WPQ8h8uq+Q8VvFn}%O&KHWisI!kC_G+vsLJm3StICm~5iTD<7NL*whV2j+
z<3^Zuh$Gq|i?r`Bqx5Oja!meyQ8=oPObyuk%5o<7Rf;N~ej{`!J7;Gxgb{dydfo@`
z^z{=vWNF+p5R>@{f%WD^U(NUA3=y{1I>j0V4K?_hSp>)l@MuXa3u(7HJu_cnb0c5*
z@mag2t}tWhm>{nryy2Ee%NtjTP6bdx8RGNF8RV`j7`g8vKL!;d9r|94XgII4|HoJ_
zf)G@uNCi^H32~7{h_~J&2p}4Q>BpgD%Trwe(Pb}5-RDi_B)qGe?ttJrt-TqGjNMH!
zq|&&B^g#>}yveyqhj{Sq-62K<@CAHB?Zdhobcv8Mzz>t!(=Kbqd(%UG4fV`$la4Cj
z8N&&8_0F@7FR<^Vr`t~FeyOqoJa*TEMkUDjcax33KYy^-C|6u-lrdugKFL}&?15&*
zYYv(R6u<Lw!TA>@mKM%aB~?R>xAC&%g124w%)NT3pzQtrs`d=Gsm?^tiD4kf;qki8
zZn_Z&&2!Wr@UMhm9Zc6Au@{>UBpkGDS#JEOVI0@WT+V9rD4rzJS8uQOaF}F2r1@wi
zwau+I<$mM=jBU6LH?+07_MIokLpFNjaV&$3V;hAwuM(}|RP3bFWU<=*yxM2h2dsb5
zcYQaQABLTwu+b1gH)4~CM_PV6U4mXgb=22fl#k}{o*54B(^gVuTNG)(&Aj_)Xeh&A
zNT6I91GXR2{e{j9@u0KDOt=C_-Lz|;t4!?B5G#g}fdJG;ki@93vCN?Lb8NI29&f8;
z4#O+4aW=b7T1?vV)o2y@om<$C@w1TrC)PIjePOGZXq$J^7J@=%gYc=ZNqwUAOK?ew
zS#dbr#4V&v)4-C}P)zKC*0oMp-~~sY2@=5zSE0E5?z8m04b+Ml(=FzX6LCB^u3s=-
zpU2|4leFY0JfbbZ-!(j{@I}fasVby)<au5YvBRujcSVHeGaRLS5GjHnjCcXQA47%j
zTq~C9HB8DMAq9MTS_6r`zvBEa>|v7L&<;bnh^A+6Y0nwV!Bkq$XX~tIERSXbL7*rd
zJOcgD<4n<wPxJe-dvwhSCo#e31||{12e+bpqPGou6^Un97aHZB|LRy0tk^;GCiZ4F
zfiK71RItQf^hQW9L3p{B==9)CeaESh0n4AiOjATk<n1@NEHy`Qx6IYiC7Vskf{;H6
z{L$h1p6@|vlvCJ+K$p<wK+pI{iPgL@Yk5mjxcN+4eR7pPp}CT=speY9);^*30%fz+
zQR^{_PF4L-;p>A*>x#|hbQO2KjxWF)#YD~w*e?q$4={7vj`v3oC)QY4kyqv!agaQ$
zUXt(r?vQR**1SbP>g+g5hHG&-!Z_;hdHw*mcf7QW`tYj!Jpv<jU18F<*00vlLL@8>
zRk_+e(?76&Z^W76SoX%OAG1xKwmbv+m(|!cdsCM5q{fHj)YvGidBbmtM8YSfgA)kx
z&^#I+Xo^+x6ys+w4z6P}mtk_!U!nd`hfn=J0E5m5<|UEY^PIDY4Q_!!sO)b-)h`u6
zmGukpQEFEFCTy(5|1CY$WsaWEk6VDNp9FBltnZ`*hy+Sv%91B0@QM<HU#m0(0U90!
z%A*lh@xgL=S~tztT4`lr@!h~Y6zN6VfnpLBvHi$ofA;h1A)GKvP(=){F0PO^5LHPh
zmw?Hb!5I*av)YT)f@cTR>NhGr2-*8d^G4L2M>#s*yUp#fbs=R2O}t(!^<1<wA_I;^
zp<D{qsjd%Vfh^6p%6vLzTU8f0_D7#2$GV1_b=EVy!jr&E7>xyNyb!W@x3G|Wn1k_p
zRAi6{eHA}E&T#U&ItEi;S%>6OjI&_~&9i_Uq^^=(q%o$yywBkC3)v}dpXcm7J9m)2
zlo838GX29ZRHXRZ-fJvWp`4~O8&bRYSLjkoy(za{k#ykg^S<g1S3*&QoI47{&i*3q
zH;D-C^HarrAF(XLS>a$egzan{_26~<8V~uWL=G;sDPSCs_5^}A+6C5_A$SAEv>;~*
zb4a}u?8Pc^9v%&-*c=}z!vg~2gWk)<#W4lo3solnt??(~SM|1HyPBEC05_RHThI3N
zAf9$rli>Nf1^HgfZP#J(iIDiJ4qdxGm&W9WV#CP?vxcl2CcU-m!=}0eYW8zsgTqV3
z)00vB)#Z9gdE{(HO@zMt774QLI@?%|+aU>~O6J^^y_2<OwNf8&wT5>`zG9<uwNokA
zMoQ&U$ktDPI+MM+acMd~S<7RWU%9w%XWdIuE#_Z;u(@!|ZF%u(Vp4pk4>i0#UKv|7
zTIyG{_cImDOW%W=oo)W_Y1A_OWijMwt1P=7)$HvuBoPGpq|<guG2Xt#2`iFZd+_Rp
zvnX9g5Q<UgNRoW@wYEY%1zb3P5Z6G!O$skRXfjMyMtK0ZL%uP45cy6bSx|WdCcjq(
zlDD3gA>gJ&X<@_LbLN;BKGa+vo$$!u3RzgOaRiAnkSipERAx3j28F`z6L#DD>5YHD
z_#;QTg(c}VdF!kD7AOmBe?C~ekI3R@yrtkbv^?R7oUJ#wW?}Um+K7;!q+w(1gg`S2
z6BWR3X5w7BaV(2Lx>^TCtoh3*u7_NImdD?oUoxmg=YkdB`x(bTl*}&edc-SCvf(?7
z^?nj&Y}Y=Zsx*XV$x@=CUku+gr&5xnH!*Re<dwE;gawcYvOPH|e5xj%&0(5@jBd+m
zEl-i6UUtVI&0@mMmvDKAh+GzvV4RD5ND#BU^ci!DdRQXwoV0Bkz=M}fi+p&ovol#T
zQO(al{Ao>i;NXjAt-Ms;zwErPi|5^vHe|?O(WhGV&XY&l<>iuM=r>dm0gp(XJN%cz
zDWzgf%6c4wc){n64NXo8yVJoV&AnBn)koepnZMSV`8L#a5?4IiPYjv)9#2;AN1a3g
zQ0+his(m5=H@(3m+Rfp{MDwz^Sk@D&RedaZDQC_~tlQqm(<xgY#A9W~8kVq0H+f=j
zYU1ixyzHxG;HV=NAfA`p<Y6$eKJ*FqxMm`7e*=>=#&MNYWfQoVX`z1Nm{v}@=7!oA
z)dU$%%*X6-_XIRuPdX$2x%D(~^9DIjS^V#~qwPMSnOvPoOf>$XnfY%LR^|fB!`<Q{
z%c&*VfpQZNz+vosPO;Ti+eqj@F2FWCGE43*CFweoeMq&^CgR@IrE`BvjRp5mfDxur
z7s~WS)a>hC`V}@wLwMCJs+>+Hh)GbWgV5Rc2_2%O!7oo_?uXPT20m02SA=;Pu5`!Y
zDxIz-T{Ft$eydc21egp(p8$~BU)iy;9<`siHZDdM+590-iT@!oUA#oL?WXiMJKARk
zi}=r;Fc6uNC||9Mbup$*j%=!BQJUARr7IF8Qg^m{KJEU;0`o0OY*?}q@0Anf>UiOU
zrO1w2RkDaQ98#8t{5w%>yTTb9vjp<uOS-_BH>)M!Oa7$r7*{uIL{di<>?^T6nxVg}
zq0N;k<`8oio8>W<U9h1jhTSNJUBtsF9*5hjHlfqrTw?Xi=GUFSrxT^q?m2lZ)x*e?
zQc>2^=BRD$D$(hQg}@g%Cy#Q#pufsE1xUE+BLH=OTr5BXR%Rg_s6ox<vs<f+)Td;t
zKP^8jpVFnWK14HBrpTL#_hf6xj~?$046SLk-@4F#eNPk@O*fNlQWBbAkW4;QdgxNK
ze@k;uC$q_Nu>8Q5LsE6xK9Q&57Vuj!=AV^7yH1c)Cj@l|g_OEFEUU3(S5+6CkD4Ub
zy$lFzHDB#FUM2N%nl@H(b1`r52>0t~kI^QYs=lOnz2ndSZa&94XM&4`_)e#!CX4NE
zp;d@VJxkW_B^zF~ymjRhMUi?VXT}fLLRRkJUGI<(qel><VA#1!uQO+U-0G8;%0Py!
zFd{O`r(O<#lpa1)1Od@z33kpr*v+9PRoYd6K^)1AAh>Zi#;J}>L8wuZ{p&ju0lWg*
zgos1!CLSc~JiR;5pjP+L2b}*|?`xtxc#H$%|GYN508xsW&sB$dkPSMA02iP%k7+D)
zOp-SG*YErb3^lXisU@>I%OzzA-YW?1W-Ck5A&2S$@{tzGU6Jp#mO7f7kXg?}Q{vX5
z-jlb8`x~%V+U<=b^-}SnS+jg>QkxBqQ=R41wFz(A@!}VZFXZCXv<Ad4!4h8J%K6*0
zj+X54Lqf#9yjBee;T4=1c~p}M{-H`u{@3POD?P5hV<zx^{teD;`Pz*7aqZTF0_0@*
zX;5zWFrPru&RFA?NA<G{-lbyq;OP?IiZzE|G_S%}`0)yueI$yWA#9{4<YQB`hk8{y
zzz5oEm7&A9(gL&QddT?;C|mi9upsn=L9ruooMz*R&c2*up!pQpkT##WPO6%WWxaxq
z%Y*l!s0!<QtffJdw77%z{72uhE*@jHQoY}#b%fn8Euc92RU74+TR1Vg#H#s<(etrs
zyvnQ0=Hep$(m79Z&I5Lvjt_MXlbX1ek4Cm76him8HXPpi>rHMf+%>Jw<vW(BH!Un(
zUufIiqO!CIT{r)!z7>u`N?cn&?#UWwrIk63tvfp8$hG-xU9#CKX<zSCMDk35Y+sY`
z@8d6dbunGhHEy6?2ASMs1dK|7CeOp30;#mdwJe6HHJ$0zvN8JD&c^R9TQ<48EZ9vy
zF7jgJru&)z(I~Ea9w@R3QJYhBBT+#jeS{qgIozbB5_Ng(%hlh;4IO{AFm7E5yr7ob
z1zQ06jbbZ-<5$uxqM@ou{o52wee<htbo(PjWvA+rlV0+-CXaqA0v4ewhGVLy1L6~n
zo4R2?#jp{hLbmqHqR2_HiK<FV?P$u7CKht;q1@);fx=k9N_SaDzKwntm%(20&8;!u
zObMTmYH#cqYM>*I<sRuoFY9bJT22g=nJ2Z*t4;t9zOH1yiGE6tzSEdb=jNV*wcb-d
z>1hcf`P^!(rzS^A=l3R5iy!0w!vzc*AR{@=I3ANB{{HnuJ@qdhxQno(b$Urtefqsv
z-cgIz(fL{0DJ8XhU@xcR0p66>NVTg(cPSm3dvD)Z{p~Hu>;b&({?C4Ogj=6%FAYk5
z^laLTa(eI`7Ic^cF{`NT2nZoDd=F0Z_n6kGKn>2)G=z!c4Q{0!X=;wmRvR0>x*y>&
z>bWjlo>JF~d<l(aFMl_x*%zFe)T)vblvKImGJdtaG?_2iHF(W(AMKgpO3<i2BaZ1Y
zFYgvILa-mTrV+GR??jCavnQHgR+U=|ctd6g5+Vm}_U#M&)gmlRRR?R1j-iyPK5_JK
z|0Yt;)cHdg#Zslw1mD4jph96_qj`5(fjJnmu}x0O%FNm9+1hi)`y`Wl{#Nl8`9kXq
zv7Y?zeJk>jkFO6e9&;pXJwF0zd59%)NTF30(n~U2U4znK9LuJaCyt-FvxgLpVj+cz
zFqHh$5(S*gOHv0*^n{^2-9ePzDGGlg6KMQIj1IVw^^N6P=8)5>QI$!{=BLS$Wd4w*
zdf|s_o-8hnWe>F8L_SD;ihdiFQ_R_pJv>7xB);b+>=?=K`FJ7h9kNSB)cd5Iv1Fh_
z;+f9N#G3rg%O8%JL4nls@`<195|?xfLq3#$9?8reY-3>!skx=Y>FIw{HuIMldOYUD
z={Stnp)u&%#_Zbfdhbkk&Z&HA`Esvfp{-(;Zyw`kUz_Nq&?j7z)N}>M*DD-ssvLT4
zxm+h|QT*%#R&UPf$wml2b$|Rd9$8xOn7#2YRvQdDmLe;Vw07uSiy0eS+Ey_&%ocfv
zf1}YXq#FNbiXlkZRUsV}>tTXz`T5<|k7a7uI8S@QXCBQpHjL@yfpI^su^(jB=34sP
z$bOxzwHd$bxfwhnew*8PLF0$}pEiA}$atOAiyjW`0q7_*h0wvZ@uUVm%k6N0aQzZf
zJK7{h>8R`~H(~PYj%ONiesT1YNG*gNEV1g&DO6F{zTj|tqGpM98|BnmX=<~loLt?{
zt?fX^vP7mje0VVR)WG7q&J2a_{$NRk#m6XX$)d3zR`$0n&X+J~^{Cg@j&zr{?9aw=
z-H*dsrp$%j8VTklp87=j?^VS{MY*DPtX9~s&4rjxwYRH&NcGY2jaa<_nid18Ql;Oy
z)PY6y0xT$9kg+ELq13$%AWmZi3fDg|E(}sHJ0`hJanKj*T;&%mTvcI&FDP`0EpPG1
zx9h8&PsgvQ7<LglkXla;>QZq{jFfJUWTz|0E~+}q$4Lg$?h82{-ouTJngzy+<Ds_p
z>!LCnZ1PPv^hr;I(OQK3)8bQ8%WEsir>jJI1rA2)^7FimH%4qC+<2_Ux`k}H-0o@t
zQ_Ja;M-8Q}MuQamE&IeNL6I~qI;}sGXL?I_<XK{Q8vF~D{}T&PJZekb85{K<LCxR8
zo5CSeh~3xsIm59DP;vo+An%F$_j;gyJyyX6ay}_Yd-T9bUFVN<9ig~_alfL5_lNio
zc)CNQ>~n@}&hZ|H)vWfCsSXFHSyuk`>{bcish`Q3cb8RY=C>Rp3v*?KLy`ijf=lUa
zLrF4@fp13P>E(VGK)FVz>z~`5zC9C|Sgy*xC1<(3<*9#okHw`#VhpBtYA0XUyp<@8
zW1)9w@jx7}{LU)ks-7KUTT5M-=RRiUJhr`uiVh(NAB*g_2jdnG#icMu)`x*{n}+9g
z=WuUR<zU`w)hEoHG_CR-JSsS!6c38y_n{l;Rh&*IM4^EYcHnhyN|6{NJFd<jIkwj|
z8pZfVxse>M&5)^y(Zk}{$x!)j;3q`!pels_OY*o)5@#>gR*}vE*>32;i&TeVk||MF
zlZA(9{h-%P+>@3p;`SWs2BCQkCLp@UhjQN8YI(SLX%aCpoy<**nAWwoKNv@!yk$g_
zxgggYx7wbaqk-Q}%CUWQoGmb^T|Oj6PPsQJ@bd!)Yqf%LY#?whE#^XkZT$$0QYAb1
zlnsm(G2*Vu7d>va#LOS*U;nyu!N6Y^HS)wOWG^^#I(g-j#G6Gs?Rv?E1h({4I-PpV
z#P7s(o|K`K*KefEOjm^LyPLmn<aoWsKC?umabb40z*-T<Ul$XyJzgB!^;Il)oRUS5
zzEVuP@k5t3?JBj7*UILk1g{c*gY0$kr9dH-Bi?GIyoB$&;#(K_+<{^<i-C&PoK8to
z^G<KfCi}$hoz(NBhpbKX4WZ-BYTOUEKeQZ%yHnS>aMA8BksNwuiYJ>@U#=6z-fd>s
zIvzIH?Yh_f!jR~L$Iy4#3<(*0@zVU+-(|9&(tU|Cs3-k>sdFEdzAmor@v?q6q42Rc
zdCflHBCwa12x9RmT~^Y$ZGL`Mwa#A$T?6&bI}xKp?z7N&$0@WG_0FDk#UxdB1Z|JW
z85Lv59vP|cNPBj@E{9uv0h{{w(M+12o9LN+OumfYP4Nw%S!|VT9@NjY-}=P+rJghY
zMe!Z}QSs4;ax&K`N!OYxd0exj#?G)U$A^}+6UG87W=nnYZl00*YpNqsnGC*h(dvk0
zp$qfk<eyEB1F_$`%tqap4h*P+6U`mNKA5`J=Lss?qw%TCU288(FwqE&BNq%flM*<!
zGVtxVSiqTXT#K0+%xOl2Y{zbu^wZ*czv@A81k#tBD?FV?Cx?&sVSVJ9>NH;*l6|VN
zIhmiLb0U7+|KQ9>AmwO%`eU`=KtT-U0<z!Ls)fwbdb6)>*oJ%+cwJ8bcSqu&%63PK
zRe9S2gHN@H9^fL(GCMC}-qxxsm7FjewZzH@C!G3aP$PXgn*GUUZn6tqU2UJNdfGJ<
zKh|McY>L?dFy5aysxxyJfz%W7Hm3ZH#REU;h3YF!_8f0j)}W{SF8Sh?2sMuscdN+B
zeM@-EUQDpqxmkzMU1>3g1q`-gQS?s!yc|Q5J|QnS51SXR^P99~z71EV3kVJOQI=v*
ziuixn`|^LNyZ8SRrO~1yl`Pd=S+cYsWJytBY-8+eCE2r$><vjbSz1H}Lo%4L#@NO#
zgph`rv5X}<W5~V@-!pZ0zk7eakMBS5eSCiDao^)SUgtX3xz2U2>v=7w`K9ytb9$PV
zxTi-dM~KRuH-}YAJ<q64oKxL<>dfJWG__RBCVF=2+QPu)v5&Uk;8v{oy(Moi&3?JI
z;Z0hwXqWb_3~<PvcP7h2GduJXvHjmE;o@?nCDU;0al?B((10d43sz2L!7{e_^=>sK
zOMJ~!Mfb<`wmFQk>|2I3c6YRSp|1l};wEY;<bh3j#JNSO8BW4Cj&F<ZbGv2*4orJ*
zKgqzBw|nsyu7>q1g?Vp3S}W&y!?snIiWyLxG9SO`vY&9I;+}`1li5WYv4UQ5xuTtK
zJ+yey>CK9}Uz8VgX4T{yDinvU5-7*Y`fHU858s@oWxJ$T>!SBhy%lxts<1CxsE82b
zUy?4GfLCcwO`gkMS|5f}(zf?>R0rfgGxw<CnNVN}x&EX&NAks`mFLQ*xIa&a_E=Fi
zr(_wpAK#stiau@38K-LMiqicd23IlBn()Ve_lIgj#bmp29)v1ywiSuy^(XSqOA^ky
zHrYe(-6w<H%w1AOZ<*-ND@yX<XIIin4mP`2G)pqVKVm*kSfcSb6W7_gB^q>(W88|w
zchT}mk+PhlhIH^k;xe8`a;^0st^7-~7P_>x$hUD~*Tz(fL%i2iXSEh`kZPga)yYwW
z@b%xQCZJRL_e|Ti%k{LaC<;#BDA<CV!=L(AZ0C&o>yR$~h$(MYnzfwrWE7Oo%}dYj
z!qZLRE?9`Zl*ypn#U5ELnEjQI2Z@IHV|x%&xmAN62qYEN&lNwjM~}8%PoPTSgUxwo
z>$cS-{1#Cv4Kmlz^08;jB<yxhB~~#M4!pxxS8H9*YPbn!!`d$@QM9LZ>e;-y;b(U*
zY~DxgLz-JGJ{DtP7vv<cq-;8-2GWR14>Nr%lyY+y0>-Fx#=aGlVifC^*-}-`y<%^r
z;3aomrCbeeh3|P&TQAg<8+Y@*nxtHvEqH~g#oEQ=6st1qyu)6Q&_pucxW3po;NZHP
zthe53cft{|NLA)bv7D3;m6z^<Pn2&$p3dKJdLT2Q4fCIxW-PxiCcGnQdfy*aB((Pa
z4UI9&eLP)0VHv$7Q4y8Z=g>lcdu@wk3uoJsFWB7j&g^Bg$RpQoe5Gl3Kc4Bsl1kk6
zqK4VLoyx!77SKEIJDyZVH{_vY+Ciq8Zt-|+H)jt{o6O!Q+Ui<e+J-{+Zwlh7>DsgJ
zAtU@-mi6(N68m%+cqHE>qo7QL_P`Tszf`f1*&hG>TL;IOzMTP;<9p%lC0mO}OUWxf
zY>y#X0h;SXXn{WkC!#O8Jil(~RB0$yT*Gl3ZMhM?L3V!EH$1a?l=AHH_Q(E$0OMl!
zgm@Nt|0hb(^Rf7B*AM0+-hQ)e-tXX)$z5yX3fNv{7blc)zo%#77+k~1wEa{bl?LCc
zsFLaqiM`a>iCQV&p3#Q4J=A1WYgLsee2=(p)rAf$uR<jr{nj=a)KycdyP76wBpi8T
z$4*vAtnyXGUM}&D!P&Ry)*4gSzgXCxsy(R|zY>|iwv+X=j-Z)<J^oqmdWCR*ndcre
zc8qG&Enn{Ys^MJuvE|WFhnh_y&IdOYqGqz8nnLF7_fUt*c3d%lPT$ttYGFe_@(nBv
z>Y!|0@#5>`nS%s=)VVxjVS8h_x=i2+T7&={QKM%7yIbr#WWwe$BLxlA<sQ&<X)M0f
zUuEI4P#I9p^G!H!dKSv)bhY$_-EN=9poENIOZ}|50!Y~1s=3;ae@R#oJ&sB(86p!g
zNe?69krq0bqHa{}x1NIA1N!?7TRAq{)-2Od0u<pV;DC{#8{3bF&X+i7T+&;Orbp&h
zW48`$>esB6c`?3VjyQ;+_LSpk%`snl+_$Tw%?*%=+X^8#|B8HHuj%bv|MA(6u^lEs
zDL%Y&8rL)al0C>g4yG(a>yAfE?Nds51EfP6UKRj}onv7|{9@<LcgiJ-or=To1blYD
z2b5gqlbL-IAIhj4Ib2UFW<nm1`^4CLVw~~5<x56equCX2D>L1kU(3Y6kcrY3#{vyT
zub}vaE)bvX4G1w;a=Famlr$LXy({*wp0}2;kNatg%cK{?cu8uxD$4dTlKL9A><#xh
ztGA2LTdug>i|Z5}%08Z&tgg{}|0tybHHV_&H)oqm!9m~N*bucZ^R(4BPKOW{p~Po+
z{s_z=+!~Ua)~|MO{^e_xJJ}Jjpp{|#t0|{W`-%!XtjUHp@g6<^s>XX)^Py?CY5MqW
z9{&FC+4AWZo3U%$h*|HQiR+Rr9Qsm>6-^gShTS!j747M5MlXlDRHCfXi7Kv<P?7PQ
zK<?gK3Ywvv-dl<+dR4cNeMPycv2uPkc~TvDo^>bdT?3}nF{lQhU6lVT>QMH(u|0Rz
zdu;pPe=#55cI*+lj(kT7UHill8RpIXxJT9r$rB*8Oe%s|rE_X<KZMljC#Dy5_lm){
zXZ9erFh^wf)0~%CMB$Bg)OYbVVWsNTDg9D#>-0pj;zH$0j10HOgspGiX(E=^av_f>
zUb3>~f_1R6N{@!tpyNsDL=WmBRGupGXo38SV;{{U3&W6J#LS9FY6?*7ps3u6u*w=V
z4KJM3WR<&ZUcR&x@9VIT=F;P#0t-VgV??9oL=FerP0A$B&EDchU|;FO>XS(0Ef@aU
zHzwE3S0};Nz4wPdxO9^rL7q+*K!-Y8*K^jozrc_?ScZ-Y=5Z8XCEwVA%-j2P`bIYC
z7O}_E9vh8XsT&}k$Gh8kyPOz%C?V<7X-d{|c;7l{)*9m3?!OV<`}z8i_I4D4Ap9Oe
zD-@4M=yY+BZObz=63eH`i~1KktEFg{AT(*ld?@Y;k#0X(C!B4ao|%KaO}q#>f=8G)
z%F5HcUG|u=A>#W>J8^!d6Kt@16%mro^VnUU)ck%AbLf!yIDE~H7#`^B508%3wUVjF
zVNAcK#b;ADLMw(tqOq4wj%N?Y24JzIYNCnf`SJwBet9@0Gc&NDBH#fR#jY-_u`T0%
z#8&Yz{g#F0+sBL(i&geY+e3PJD-p$+&!pR<s|FC>u&9A~?DqyAWUpK<T6;H51b6S-
z9>jWlL)PCih=Au<;i}>!y?%S`JD-bf-&sQDBi<f4^JE8$t{7)dyR>e^8|}n#TX)48
zHxnRY`ar}&cl{Kx1fbYbu6s(9$hV=^r;7jX_xKXo@1Y6L&0ZbQ*9<X&)RiAkb?(VT
zc*Cv8v!7DUfOOsFzQ@p3Y(C))_kz$Pf3(SWaI9Kb#NNkhEqHy%9(CH6vR|z5S)z(i
z$TP@6yzZya-q)~=6i(=P2R)yRUR?1k+?|B+vtOV2F&bXVrwx}vr0@ue2${~p8)Fuv
zKV^(1&I(*EDvjXl@!pw4>LrwD9WS;R;B?tzEHzV!Z57u7j4*jjRF#+`eI}|MEOV|4
zJDWASjle9)kGMQefmDh0>%&8y-|2sLqz6iJ>G}NLEBK>04K^`e+*py96X`X;<BRF>
zQz2^wR>_gjMe$ohTWuVZ9=QWP)>GsCT1AkHyHIr97;lC0xkugFf!b*J+2LOEt^Sf5
z4*NuC4m9Z5wuk<eIu-B06{7ztVb4xh&gWvtM*&5Di<OoS>{mi9IDbA4>_=Om{$v-v
zKLme1jl9j!!A<2JsSwndCfO9WOuV)bk|N{2nHHVt7V%DbgZ;c)OnVVw4Q)2CJY`^k
z5Qh&M_Guj9{*WomwYV*xJH9NRyRn_+yWh9fG*+7z&UF)ZwX_|zj0)J(OIv?mnM>2H
zgDZ(Fe#vc+U|T!e;(bVY2Xf-*^UikZj_ea=vuoWa6OP<E^+b7sm7S(y+w>05SIhc-
z>BdZ!7Tnx3QCe4L357r@kb8AK35I8RM4>2+eRCrFwNXFrO+7d;R4xNQy0sEsk!gKD
zkBmSe9uwW*o^j|1AZoB!cc!Ryh;n8FZDzQ+?c3HqW^_!Iz7sidSTHX<=g0eEcERXf
z$cKOhBE{hqIvGDY>YWM<L(=)~%J^(6k>XVdlf19y^$#6$e$7!XTo-SVKy@zgp5Xzy
z5*|%vem!59`SrWjS5~`Ea-@D%5acum-0hIESUkFu)lINEwCALi>%*IS!Hslt|DMF_
zK)s$;d(tk=Cd0MqD1Oy9=Y_I0w_u)V)|qzyGdk?7t2(CayJgi}URpx#dRUc;G41^*
z7QdpVGgkZ-fc|TPKlH#+iP?cP&!Id%#KL|vc!z*}s`Z<RdulA3de!bgF@Z(B+<|t2
z&`Sqx-D|qtZCD{VFy5Nao6d_=VbY```T3$x_@*9b>aG^2Q(zh1T!RHr{+$bRZUNHc
zJELMxKT*yB8{$^q5Do}90m$OrpL$c^^5MZ)mLv{9)DGke)>TK?qjokW6h~W<=ouX&
zHg+0h`m_50I~U7N#m=Ka*CRVjM`DAH@fkUD08#v$3TT>Vj|+`mD3Cq)xSR{{=d7|-
ztf;ibQ?N@+vK}5gyB_y)M-|tcnIIsF{r+Fowx}Rq@zrj#N6Ti%k+mYqR)$8mk~Y+m
zIoan@ia7VVz34w(#r0{T+Rc<5T<rX8tB$X4RbMMlK(~5V52pc6i0)(|nC1Tfk4ik>
z4aicf6Y6LLq^CU@X8}Yh3!d2tOsqcln)b1vg7Ic{E(0*WR&Kseh9v|*iTDt!=zEJt
z*U|*l_Pp&?0lPOZ1ZcX(GBz~2>Dac5Ujo>|W_vj)qhS#|K#H#bvZPO_{jK=muWAXl
zYdnCc9W2N9&UIr3lQ;sTc@h*1)spidoD^=-%kj!XLaZ!)8Dgx#CsT_*OR!y(%j-MJ
z+`U_XrjbLE2PJ@R<ez&i%)&mR4$MA7$8vUXJ6~58un%ps<lS$c7R7$@oMa*F2Jg50
zQ<r|u#r}c$@xD61sQ<yTpS0_IxP3_8V?u|&y|x9M#Gf260xPykQwcnYG@pRVo&jtZ
zQ?_b667*A${=+`zXgBbeSckG}RngVr?N1Y!zX*X}Zb%-u#r)Q)Dbq;U$^7aL<QyO*
zge*U2&x&BPZI3x5lm!k(i04SD%P*N`MR>E0jQ%2eL<cM{)aul=glh4paS4!}tU=&}
za)kXI48F9J^&1B#<%i}^kr3px0OZd3?6HWb&KabYUSbJ>179*P^4dxSAb3O<d?@Nk
z`N>~2oCGwCa$a~EdV~_f`5Fc`NCyyc{h});VDl4YpPel7Nqd>p?qsbMRkq@>n61Vg
zqHzC@T>^`_#{AHs=vd~F2OD$)&~P#+WbP3Qf%C4L2rDN$bAN=+9bifb=WPkVzso-l
z;uGb^;8@g7^F5YgA^cAP`?=FBgljDGZog=76H2ajp4i08`f-Pbs#`rwe<uCmUY$;2
z<N|Z;#(;(&PkwUY4{`WO{I7pL8vr6qPapWXub*#&z4#^LOqzcECIIN(bRErNF8&K(
zYW*gsd+@I<_{CivWxyXc)^;1FntsRwwB+)k58Ho^;LHRgUdmlsWS;5gkH81n1q91N
znQZyV8}N@Vla=@zw;Gur31VtDqrjZVKS$&<M-c9{Br<i+8aUOlx#bDJ_Us>8A^AXK
zo#OX%<^w-`4hR49kAM4n62QFD9Y{XyJwJEyKg#Og#e%sV0q;Ma{l0^_e4ww`X^+MK
zU>Epp^N?U(i-2KU7$ELbIhf?j!HY7_|D=@dFL08S=gfdM=X{O;t9>YNeLd>W5ibGx
zJeg;1R52%E2Q1@Y>;Mhpv<=?h+>6uC)SJ+5JAO!6@B<5YOgk8!saLNaE6(?A!R^vN
zo1)BQ`L#Vi*;jh_vxAk_PzX;CXjEncUQy2jZ%PZ0LP2?0LsH<Dw>;>C=U5ZO1~RqL
z-AY?O4tn*0=c;W$)s$O28I-DZL@I4PUcskn#G2p?vikx9=bO$igQ~0EsV-bL=m|wC
zu+??|P5E-@vN96upwx+38dh{f>uOD;>{y(pw(DT^POmTS?FzOa>D}2`JZ3ic-DTTI
zX8BPOs1wnj-YlOGPEWWC8l&OCE6bzfHuFn;@?P{pm&&yUEhUDBC>*O_X@N`D+5%6J
zo0PhZ_AT&gPJ>dJY;yt$RJ0xbGGwg;3VEbR8M<dsmu$k8=v4Wob)~J<RQ$t^+9GG_
z)E3<b<ZaWytE|hNFzt|vC^OD~Wa6$RkniQjIC-X%GOsF_i!_fFH+Q7>mFBo?tjrdt
zM#_oc(V%v~BSw^2qz779)-dj7=_G?bkMuY|fA?TbFtLve*6a*=8=*Ghf!`^_>?PMn
z2AyWCrb*fE+u4JYn+xsjJh6-C;az>87o!8SP}m$anR{ok@bVVFYon0S8>hddL%1L&
z<XlS^&@8cKMZ7`9O2&Oma~f1%`Ga~mgJNf@pN!~p4xtPfdrqEg97b{rRN{qGs<mdX
zGlG=|eG!e3d?XB%cg5?oXm0u4SaZZXjvOYVhLi+ZhpR9CK26F7Oz9B#M>^PTtkNO!
z@SZiNt<5!N?ca*hWeAPSPiV(pN5IqqKx(aj(?J>+gxL*i8RUBtrsVuB8#C>v7uuED
zyZpx&Rclxg*c{jr_4CU!Goblk%jDTw#5JUOTjpNc0h#ckYVnH~J9_XA<3Xj1aIbZF
zjOM#b%Tu`AvCW}D?tiH|%A|__H&v=u#r_+J4KSU#R12s})Z0k0oPfiDk2ZaS&49vW
z5wrNm6mK6R<=jW9Xh0vB4%9}Px4ae0_FJ3BOnPA}7f_bnE1)37^vC!5f{GOd-(?D)
zSKkn(8U!+3oUz&1A$>*~X4^nDGT`M(FL<b)hIIi|JoZ4E-Mf&G<IG;01d!-;kbn_E
zT!DM7(5cRhGIIi=lss7m{uo~YK4ehv0Lm;WVt|%9UzW>`oYyJ@ef_$lPu-w_S!5Wq
zF`|ZLoQstk^n#(fP=ZJu7LF^Mo$jeATtu29q=kirmqBJ&&q&j2DC`CSF*DV0BSzi+
z#CfQ%fhCtwlK*n|axTct#QR93b0a`_rhp3061`uFQ3$%Qwkda^K*tGYJ>Zx1<rxS6
z^+i{5Y)hdXF(EVvzN9)p=DHPO4C2Yuf?>l3JD-9EgLbS+LXlH%*}_KGvwMw^p4ano
zHb6(jZe|r=!Gl&exN&?NQ{H;t*(TY^X$@oTQr63YdO)jDskl>l%%V(%jk0BCncli9
zDAgcTn{s1B4YKl>RX|C8^G%($`VQHXNnB5H@o<J7KAkQh&ja1rik+18AC8boa0Nn2
zT|`7|JU?`X7Q$okXeH-~*)V9<7+V5MJ4ox4@Sfy%E}J~p(q_CHJKyQ+x;W8+*;p`N
z(!@<NgoZ(NQZ>A0vcdxtTJEtqX=I%qrpHJ{ChdW9POY&!mbEb}1hLf%xqO|fSAk{o
z&m{#AuE<*eX#mEVAxjE7?eO8leG-$9m&-G~<D{DT^tuBluYLuUoC-Xwth*7AX|6BA
zLb`$&jG$W0>mUh+UXI}H7?1dGm_e;+-Av)z8L;$7!rP#5T6`qgTiU5-M1?yf6vCmO
zIj-%hznZKgnD<>fB|cKu6Ri>hIvBKsh(N-)3O~54SJE~3?R$%y3P3;HlI^!qL3`Ow
zKt6EIKZM*hAj6YW^S*!6knkMW`I?^Bf&Q4WLv;YQ6dS~Du1!P~I3ST!P{tdZ9Z?&b
zjt=Od27&^_gb?hTBFFB#XEojy+IL|~tJUElCswWp?=F2*pBwPyj#c{L3aHHUxaSM1
zfXi&}w1+mBjuTDp#q(!xz$vLZw`)lCJeIVRY7r-Fiu>Hcef#MZD;C9Tp|!G7Y_-_;
zuily2<OYa7CKjYf>QA4aI^J23fdR(k3{X7|N6u;Hfxf5J5os2=<Nlzkc^MSwmPP9@
zbZ_Gv9+0;{bunRD<YXDB&T|4skiG^ZOuP^8TgZvdR|$atS35q*I-7vmfJtKKwA#~E
z^h0`#OYh4l_)c|Zir;sl<fX`0)mkzF*`CeE=Cf$Ac?X<zva%XC6ckZ@qndxW{-fve
z{woIY{JBeFOIsbXi%BcH9w})1ob%Yv_F7KsXt$J79<czYY#4~Xe`UYrct?R}k6rq^
zlQ|{d!)RKqEB_d)JCMoXP7lI-SN?gYD<gFP1fq_y#4d&B*GdP<(PAwF>_F$BI%@!~
zyHrjFoU*L+^R~Z@v8;thqz~ios19O|1faPQlN;FGdR8f%qJ*(fi}eu8n49(xgTl(B
zS|Rw#eVzoi%V!NxnkQX99ddTx4@iSau7S-i%@v-|d%}Ip=l)Ty-MbM;_;tv-{*Moe
z!WW3sQeE2Iel?CBhmKK%Kg{2#df~`y{}3J?uHpGjLCCRF?kOaXYea}uw>m8!2K$Op
z)*p&D(PAOo67cV23r60Xb$K;3fUX*gV-aDCgX9;U`qXhtV20%!8x#_-8FquM0n)F3
zDnp8s`<d`3q9+SFWP5X5Nj!RCm%M_ENwJm~<Bd@SGRVrwF<C^JEdX+9=^8fd5?CBO
z;1N!w3*7(m+KDh|j31;BzxzXwE;T9%2H(oeoYKCYCVgLbE-pp`!RT1oMb0BO>?@1f
zxodX`r~V9R{p?28&A@Vk9+Np$TRw(~m}>3WG*D$ww&ICst3)<v1$t#PnZgD<1GZ)7
z$tG`#ujw0Sj4Kd8kOSE$JVJ_(o8RXMdCnavat$7>&P26;o^lo7`SFO{X$uCH6xSu@
zE<$3tzGSG}A(T$Vde87@T9z+``>sRe*%0>eSd7T<u5O;+xd5l&dUopXgD${1fDY2H
z|6qR(+63cO3h?_18`if=6HbIoa!sv58C{)lXvIENt8`zTr9>4D<}TL1RANKS_E#9>
z-&N&-2%Y>)FAJcof=0+?Q|F~ZxSN`q!1=tiQpNrFQ)43mA{1D5*C5IjppA*91s`=i
zUpJ@>MT=BjvM)QoM9ekPmx?!WW|0rH$Ee6u2@C37J$CFXc3`VeEdp)%V*)Us>)A&+
zP<%@*NY&9yiC0mtTGu6a6q>MI_dzhK#UGV<u2*XjUAX5aOW&3T>B&xdLomWw->zs@
zspwn3!5|!t9A!)1x|}1aX`Li8r4PLd`lA>|q}$hoaPX8#Y7=c45oVX%_hPh>$V9BQ
zr;-oa{+4x>U0IIEw*H;Wal`ha0Ty(}#d{6+r!VYMD2RVHB?FZr``!>gVF+D;6ARLR
z=o(GVo=&7Hu<b_p1T0laJ%F$<AkEe8LYt%kxZK-kzwO+oO!wv=NJ;2EOUgdl6{^02
zX6C#G>@>HZ(38&>@?c%BF|7Td^|$y3;tz<R{>~%ED0Y`a`zw42SUEOXh=qt@&dJ5w
zne(@-(JTj{T?;4{2>eh5=tOXQx2=h&R*V|tOaU$rR+5V4TWYs-Xg^5XqzZYOvmGiU
zXR#2fr`N$EKW{#{gG^)*3X!q*-b?EwXE|PJg>%1-Kbo~v|4nAQ$4x&k6><@3tS@B>
zHin84*}ZF*{C2#X?!#1!XV)g@A}Hl{BcQNr;3H=P?Xi{$X@Z*E(ece#D|n5a74b5T
zQ@uT<wy;`II2C%H>$d1uy-~EtdG3C@FThqo%@kzjsmH`^aXA863Q6g|VGl_Ift>3F
zET+)Fm;zGxo7{d5B+~qWcsJs+yr3iyxU2?hZvTSitacP>AxGVi4T06~T*&%rbT4Iv
z0^0TtI3^N#Cr*F^vb=U3L&C$JONg?)u}7|;y8M|$dMueXKz4N93L%;7xtu4=-f0pT
zAmu3e;3V_l{B*ePp9kJkBIm$Q2mG^+;2RKXVY9amgr5{1{MxC=cJ&=NTl|IIND;d$
zd{82{R2nIl{v%x#3(>B!a)_q_J3VRmrM^&Fb8<B~9eo&=tX7q=$KaZ<Aj}3PiA<DB
zgUDyYVguA2fi@4l6<`%h^<0q8gAKlki69+;Eg1}Q=|Ocn8(6V=deP~Vxe#f11qOq?
z&L&&Bmp1YP^cVkN9Hl*O;F*Xy>JWT5#0=Y@KZwJMOF>gRFOPsjf_k^g9dcsN(jAz_
zF5Dq_Mdp(`+_616X9(3=u&3PXlSYA;_XI*RlybTiK7e3>K6KJhV5-c1OKoq!lbC<B
z)-L(%Lm94Q%#ryy)C>Lm3GKh(R=7&Law)n`uCU`08Qsz>n_4ZNSF3gXql!+*lD$v6
z2$#N;jU=ITx3gTzxDj+(eBHqsJG-@Lqx6V8b~%2x*8fvi?MA$ZT}_`m{YWe)i$z{3
zf#?9go-TPB?6z{MWzUjE84Gf;b=yV}XH7iE*>mAK+!vy+@10hy+I1V^v&d0iyA%FB
z6ZFtb#oWf$-XR;V4)1Z2Q-e7&9*NAAF&zQifd$cmOV~UO0g8<mb@KpCpF1ulqCt21
zZDy@@%G+!A)9rF0->$I1i2IhRxJ&F|JF<N-n5Er#Eh{Zj9!xb>#Tbgz$H$ZxzD0>p
zxNc{PXvG9Tg1JeP4z`J&Z#g`<tbl9<U11BHDrPyRuq$2-YnMKiE^-CG^EnG|2-4@M
z$3g61i%{MI`M$nnn&+^G0u&CfPQ~-!A0%OVM9RP6;T>t2<?~G$qgV1^x3T8lQvSxg
z=!`I3l=Ww$6da4bztZJct1Xj-^sZ*zt_NijGZwh7(Cg(Ym#DF3@HL*2$)}`ZBE8yY
z3oniiqSF!zK**0BC!*)DYpMe0OH{Cbfqxf)xijre^o5f(20$>x<HdH;H}?-%1vu;a
z^me9wm*pABW0xbDfa=d1<>L|H%4<EJ{fW@r7m6lVYjwHA7h!ZXS}h48d8akNxj<s)
zHLnM=2nMRhV>0c_(3skDXU~@MnK7U&`qt^qu~|ps40WQZ2jZs4eWmW^HZeKF#nsKl
zz!!<bn29**!<OEPjB&kLUBOV#aDRCyXQOug1q&<mR9C!nnWa+Rf=t|3Z-#I_Wa>(q
zBw>1eNX3c`6?BNrY3q#<n<0e6BGdJG%5_D?Rp)k!)j4@yHgiZOSA3>j!JZI1X15fM
z^V}cTou@)Ymbu1+@^sG{XtBsATFO5ac{7h{%be3E!#`u}l9AIa>(kh#<^feW5?y;z
zD&$4a)f&rjt(0UHV?G>4*(k;uyQ~kZP8Y5KQCKN0xhRX&*~}AAUa^M@2dnI8+~S#&
zN!*Av`Q7OP2iYu7R4u~p?h@_U9B&#hyGe${jjb5ZN*QNppE%fkf9$(U(#tOQp(-ya
zsQF4xJ`9c)>Da_5o7QBjh<ghs5Ie1;_;T$CC(iqe$&WAYmb*3h#t7ayX}p4#_qPo6
znwZ^FA;WMbFJziOCCae*L56MKDpCvw@Grw_t=PW$9DEp-)gT%Nz46g)#P?t4G8y1H
z?YsWLb>>e{MyKAN@_Wf5pKbUSP83fn)1DE+KDkaX5_xo|3ihEbB^nAh!tQIy5PsUR
z?z&#J*gBx2f{d(<NhqY{@X|`T9@&T4K7d{ir%U*AG^klWvk0Uy-&pVo33OYoI{<I6
zoBD8ceg#G1gejwpiR*4#aYE&A&df^t6ZZ#uf{l$=xe;Y;bSlC8F*aXopp%D9vn-OO
z#)_0klx1XgPDNqJ0b6V1^inxc72W4t7VtV0dl4>nq8TGa>)b<qUEX<KK_2_+%Y~A*
zCf}R<A9V+_Ft&GAPTvY~-6989r1xFjlVqA}_%KIZML`3C^c=AFA>rSmSt?%ix6~}_
zHfG8hSXT-JjgR0@hX1Sef&J9D@C)k~MLs-8E3|$pg8FD5Kb0ZuKlBhAJ)hpQUZZj~
zXWH@xcNF$Qb5`aF@l?J`bse|hPwdw3K7hu&#Wpu<fRJg>{<V3xiZ2#8d#FO}mXG_I
zYlKz%al6@<RvU>_S8OD)@ID+jmX@ia9fN^bf^Gouv9KQ?T=x@ER34NB7%D&Hr!{W2
z_olB^EDqJy$dma!6W~MAeyelO*&XJ87@V2!e0i9XSu2^|_3+T%6E%r?U5TXAUmPTJ
z)NjM7_F^|q{KgMC?E%Q96Yi!6KA9ZN%rXf`mQpp$>FEnPYO&L4F*#3De)AhAyV@Y=
zz)!C*iROM+v-tYnKja9?0r%j&d*6f<_@2%nKq=T1Sv`L1h@XS*f-s}_zi2){ti`KB
zWR-qT&x9BMhynax5bf`Ijxd+K|DR?5hW~%>4)bE}257B~#;2V>rTRFC5d8QR)A$!9
zT{{CVZl#<AZlJ)PdxJ&C3EXx54R!sT1_B`5oF{{&7XWdbGhi(pib0D4|LD7)BTfVC
zTU_tfvj8TJdPp4D)I+;&uKYve&s7WBGZ})In7GRPhy{|_-36nSXwQ7-1&B1ryl9QP
zShuy58`urH``AS(EE_$RukZTv6FZQ{xBw7bbA9wJ_!+s$xe8hZW&;q<AVq}@v<Z64
zd@zGl;(D+5qfPh>l|Tk4p1qWP=jSL5cdg%Y@{<_wq!S2EV28#YFp=T%y^G+zdS<&S
z0NE8@vS~Kb(|g4&YVakebnJvLXz@RfaI;{7`v0<wU2u>I;&T3^F6ihSj`pqU$y8bL
zUfKWR8|Qxk;p|)!#Pq0E&@2O3{!YN%D)gNER&Vimh%NfRR=JZ^1z^2Ar*CWhbV`mQ
z7vTtfGWXk;<BGqXPbTXH&A>(sy&97PWGV04`%sMVW4O^2yiQ<D5P#MXWBTuf#N^dy
zPQbVquF1?ekh1>XRpu8}yYf1W|76Sa!(j7;jtiCjy@N%T=a}@pxc!M2uZ&BKaI@ch
zQvH>F!|C?QsY73nl>G<#2LJV6*#{-n^m`I{^3sm6BPt+8$coVY)qoX|QZsVl59ZxO
z1Au!m;<$p^4x}K{i+=h&?=!eH!^<zG-1=kOQ)Ue11;q?RVSWWgTyl=801|&iI({8{
zu$}L<!Lap)7yM_QC`SXHTvu^58Ug#O+8_7z%pdEk1rZ`1HAnFfKr@I1aOBDua-;xC
z*e}X*|FO3nz>ekdzo!-ju=3krHo=su7v{lkpLWrl0AbF5WxyK9BF7o&Se4%qCKN7&
z89n+HHu}Az$O-1cDleus0;&H1W{cK2dDI(F`BLWG;=fQ6*da-9OhXQ#G(c$RSEQ!3
zONB|Wil!DI`1iERi_C=$#C6;Qp&mUj+ijr(;nx6_pLvh?favAF<BkEo);@1d19DQ%
zl#`_~`%_GUPn}Eu85;V<J;8Obu=g%sCN8qDvkHUR{^AN%O#)PoMC?`ga~!ZP5Z0f@
z^q5B%%r@uaj^|_&{4<vI@3fr$U|5IDgT-SYZpNJLa9G`G=I+clJmqI<hJVQ8R0H3T
zyz4Fu7Lf~Pdt6a!!VU;ldES)yn{iBKqzHyh@Lu+Lv4iD0n2l|3z4c4x?s(r*iTYz4
zdn)+GvHVglkd<Z#W-ISE6MG6MIrV8o|2IRx-2W?ffG7VecK<7O|KDr7wQ*{R`aO*`
z(fZ1626W5(gCzPLNb`<TT=^sT`{WSi2PZT94EdyD<)s#J+3`;9*&@4vb2#}*=P4s7
z!75%VgHr8N-d$H&wd^~+u4ag;ur4y!pFK^${E#w8b=;3Cc1=<sOd0pCw~#u?ql<o%
z0+FDz>ca}HDI1Xc>N%jUsa1&L>34*YH1{^SFY2~OIW{8sNdAbGE93H$6R#bi15zZL
z2RtZ(tMly~?vC@P^1>&EKzI3nZyENHVxZgtINSqRIfW#VM(nh6p7HnnPb|4UbHd>B
zqlyAXwl?q_mIb?G%B!6b2RCvCybzP9Sd&_by_jlI*|Xh8osA#blCBZAR;Lf*q%kH_
z$3{l{a{X6K;`33i9s8z9$tW=`BS#on6aT7Sy+^FoIe)lWz&2f5fm%uVp_<g_W0#v&
zv{0NV{$!uyt(dFL&k9f6y;pV8fp^9E%Yx_WEml^<A*Guhe^@@qO`!Z+w9VUq(ZnXg
z$$_Ny@AP$A%IP9u)kIN?OMej_)JVPCjW};8+fBD~je-l9QbA|M0;*e&*z#jSqj}tT
zsEe$uB@?zUsi>rT67Dab%^b#tRgB9i+C~K~mqb`f^=}d9l}Nd+N}`7E6Pzf7n%(6}
z)o)!)owW-CYvTooeQeJoL}Wfj75h65oDHyzl_~E_oSux)?mZ*Qe~}HbBuXl#HWAEY
z2C-<vQeVYrx9Wb{GKRX?4>cf|IeDQiXV-gr$o4K92kFH_Eup;}ILk-lK8snmvN!fp
zv!Acnt}TxusJ-NFiK=bA<o3If5?WTV_byrd=bC$>d;#3n_1_-4NdWU*O|WMd0mp0Z
zB;nyx1Km}T+vV>JaZF1Wu{lARQ79pYSZn(&Kb0R_i)`;z(%wq1R}z>d#kWkZvFsvP
zoL~MJTUP$&+@{8MiTUxsG`nWnGX-8V<%~gk^h$dFQy)5>=VTx<d(%5@=1%``khBzE
z`qErv?xS+<z^oF0X=nv36K#uT$WEluRdHzsVRHHFe5fNF4JSDn$ly#<av~h%zajpb
z$$K~bAU?-;u07abbW%i`_QG1aq|3^*(B>t#u->Ph2D~Tiq0KRPM722fS$Ny&|19<o
zcj@^trXzUNqUfvEwe_2z8D&1ASIOD^RE(uqEa<MIWQ8*+!$!<VlEm#E{avwA=S#Bo
z*&}nVQ~_<_3nQW&7rRrlhhIc4FJp=?pBygsEqB6}h;H0BUh#*Hc5#`7*pOvP`$JqB
zU&uDVxB1G3b?gV6AG!rcOiQ+D#XSy^x7gXQn;Pjh9_O>AKW&triVwsytdZu^v8KJ>
z1Y*mcXk9li)g5(QQZh9C@xDHL?adf{X}F!&h0osWk+qdRNRY`mAuji?iwkr_2GckZ
zYhDA^)Lqv1vUxafeQ2*A<3Vw0Ft+nb1#BjJcHkq2WtC)|6b)poQstn0Vy5L;5gTp^
zs8`sxy$fr>-T~AFr?#{1w3^X_Jrx2})Pon?STRarHQmWO_w3f&hqmQ@&o%<Wr^Fn)
zxm)_)baG6YTP=xn7Q9YUYVwyDUFP!2snOVtvaCo~uiiTj%^T1QC~(K1ha(B8YkbaO
z(`%R8i%Vm_Kw;K~_dgAs3m8ntuNN^KOsAe5Lni9S@xY09_tH*}%^s5bnNam>=ajDl
z$Cmr49Qh{59cI7jZZ)nwqo14xJ=VFN)JKM+?WWQ(1k8ZP-G$25oG^MQLx}<h@NvQ3
zo6c=KSI-(C2j21)QJVQgv%AAv^%`g6U+3ok&3C>`5K44G={w!UCuNRS&IW47xS#2b
zD7@Ev^0mntrrLS+<B;_I$>=&3!kKUZJLq{C5WNgZ5Mk@B_(soz5MHnlJQ<|Y4f2DN
z_VE_D%ug$R9@<7prP1b3tHtlXN%+%leh5@ec!O?gJ1|Y(C|}uwnBK_tS}_W|%c!f(
zNA*`VwPC$U*2qLs2n^r-TwAS#?;c~hVO_?t!zSF#uX2&_X<hzHTfbzl#nzY&4we$R
z-&vLtpuXM|iGEbh!G+&h9~W~^v+*G$R~nshxiS*B&LN!#`z-A-sk1Tt)e<qS<cwdD
zuChIN!B;S&+SzTqiIig?Z&;d+p9i_}4>k_a3yz@WFJ{=DyL;etUU*ze@|i!hJJ(a7
z-LGf3LxCk6u3Wequ@#vGfN*~JccI>{z*w=qk6#OGo{BX7c?%V{C1O5dT#CiYTKOGT
zDAq*d0(rDbaW!TbpLvtKZVs!;O0>vL@J;hyC#GX&tGp(CNt=`^aul)C02-RO-l~uC
z?$G3+tPyQqyFiP!#W!*{s`$@?6dxWfF9!;A;%ZG#r{g{0z%--$Ghw9>XXV;yj!nW%
zMVQ$W#r#Q#m+*Rv{Fn88p88Gc<QM^XKlV+|U5+w|M~xYoQ+X?W{m$8h43KQ6D`n=v
z%vSoxQA6s0Jg*RwHw(bS`NV$H!HVbNlXo6BYZ5SH_EVE2%OZZno1jngXIluI62(w*
z2COpDAHRSzOH^Tq8W-MAT93wYgifgfZ}Qd1wLe&RGYzm%{{U|y@FuN*I}@b)J?=SE
zC_Y=K!2THbR0FsMX5Y*}HgYW!pg_2Z9}@u%hRz)~{(p%C-Xznj5($ma0^Xz~FqmtE
zTs3b3=R<WQ?<??E{`ER*nSn!byBy%7TQhU5<YjsGF_U5cDfjrT7<HLmRZN{)I`Afs
zA+2^1(%S->*JWnVKI$_%WdZFzyd{s4D+p`U3leGMY)aATme&3K6YI}<PvJ*3#DXUg
z&l*?dGvD&U+XB`YvHPOB<*jmiGx5s>c$iFZNlMdx>9K^11|IiyQHYu=#RZN9JgCxb
zrNtL1q`8S_%iY@O?gdLFmzH8NZTJ3!1PQXt+hCre+^+uqDw=;WidLpD{jGd{?14;r
zx2*J@^wwpacXZy8cApB6C}X^#Uep*mvV(;XE$MdnkEOq7f;}d;A_D-6idbrPBZlQ(
z6ilPcTq?#Fi_V0dv~G-1YGYanN@p4OUE4p|cqe{9y>{#zPo*VA(VG@aWC#_LEQB#w
zylwCJmjfl`L!;u4fN2uX@EaVjJ3uJw=vr1u=jkKQZm$OLe)M?0;ldUM`HOo|Vc>_i
zgH9$galTf#s9;+QXM}xUv0b-eQ0!-pGt5uom<JY1h_AU;d+aoCOASXm&OJ$NqO$rs
zI;z+O5%_v6Mhsh==e$JdP7I&@Qgb|5ULQ%_LT=1py$!qHNWp@9buFS!=e6JjbALv$
z{&AAAK>NS<S)I5BGE&b*zpkdE&u<ge6)Z%SJS;63X&lAsO1eG%PX8Qz(eI44#dPhN
zDQ`o<jx1m+IBC!G^Ok$$K`j)z8_+WSJ@xp45vA8oH5fKToxDsO3*oXulAQx0JAbwL
z2h#xMIUwR;Z<3uEq3B!KC2ZM<I7zXN%6WVRAEv?m2r{)Lw;0R#K^7pDs1IxIk~j1=
zm}K1;ZdalWqW5GEx$ifx0;yQLE0w+;HLv`f6XzaMu{qO{170}}80e<N+?yGgW%O2v
zpC5<qI7Z=mWHXX{50ju&-=;t}8!vI}y+i;RLD`Off0Rz|J!vRK{$PJZL+cJ8OUu16
z@$ur{%QJMBneWCty_sEFo=9rgjHns<;D?iWdvWI|c~PO=Y3I%Cy^;db6PLf+E#KTY
zPy3Ln{qbA9y8LYGhKwoN+n<I@w6vMzdwh_GTt%Tfc%oxOb=%;kh%Dkosk{yCmTjY|
z21ehCG>vMtt6w16Byn}ZII7H6Wu@3}!;L_nnKn}$Nj@5!MgF@YYfM+lsjD5AH5*h}
z<T~5%MZl6pe#`COfEfU8nM#LyYG{1y&~qnb0xKe3ZoT^B9<vbYUT*D2DJ>2I6%S%7
zy)&2WIvdX+5+<;?!>^Cbyd^s4-!i!NJ?NxAV^~0`h;VdZROKY!M(q?&mP!+Zgby&B
zmd2qN7Ox6_=K=`tD<53tFw-fJcFp#5_}V%aphRX<M&%ATf=7)%uA#<y*F4*cfp#+u
z&lEFLO$w^q?<-wG_K>X%=goQ{LgQkYyoOzbm3VC^IeT^aL5?!h=>nD%Gm8~|bRQAd
zEW5O&?wh|5juCWO<ViKpc=LJzBpm=>;7v*}J!Q7Ze<t|pL0SvCs&FfP*=yUs_kAzZ
zx}$I1&&kZaCE#7S0Kb}E8M%7@X+_njKM#fRb{@sFtR0S!8@@+|6%3z0=>nms74g}r
zs&TnG&l}*ARg|IUlNJ~!32yY5cRnr6E|8O6O9M7{0{`mvRVueloJJH~s&6Uzq4zqb
zLDj!2PGEkk6Q$E$deB*la@5KAzG@g(evzw_6TP8ydBA2HZ)13&K!#Lh-5WrCVAyEs
z<SyNopX9RiS6=u`RZxli9}4DIcDI}bNPvAQ9!;#)vJQ){eg8^Tqi0-R(Tm#@;x}cO
zH7hdkJu0#|(;QQ#gPG<v7ki{m%az%l63K3R7Z54mP7AkXntLYG@!8qXR%ruk54IIg
zcrIMbyenz_yRh$apY06&;A|1t{wDiaoRfx+SbnJCHOClrxEoQAfmMQr!tv&6NNV2Y
zXQj!vVXEPLx?e&phbvFoZu_iQzSH`9sgb@#TIo$D?&;ndSm||b=O1S&FHg`OZ~b!J
zfAjvvdD2$bGJPJhoD){ntd;18oh~4CFkK&KNN!LANun4LVCqb{i+0}bYzUO<XG3hg
zv1AM1EAv-L-eUWWK+Im63HhN@OcHh^XMvtJTejnh=<IcY1M2<>jLAkmTaUtGnF=R}
zyOfg&GO@PqvVDTJ4f?`#q@glyW8zXlIMc!o;}J`eTQY_}1GX(_z){xs!0bw0$cq!b
zQ#Lr$zu%uV8;rF}trh>a?UNGt$6YzXEYql*cLV9*LbsX6W-|Sg$mg=$QGfC0y1pM7
zubmTRs#^Oc<)Kz9U&BKC<ZPq-*W;*LH8Xb3rrehJNURO?oQ$g_I$+2{roxl7Zj2yL
zdhw|((y+GX)#<GgC#t?u{4L@BQN_O3>Z+antCn(Hu#0*`k1vUP5GGw0;g|Q*xDXY@
zVLq8refa%&{^{I^Sk%Ir%uw#i+y0vEGLa+PI#@gV$z9n3dG40-<fVG_d-=^TLpdhd
zBg1E@t^<pVhDwdiwj|@F7aX)rLG1yJa^a38%b3SV5JaoOyhfFgMQPXYQ<=s-4pSco
zxC+gCS4|AF(=n5HhxHs+BvnC-p8sj!@XU+Y_D2K69&c<Cv6JjN9$D~SHH=?6$wWy9
z1aeogc1}0p-1c&A7~m4Q^H%<w4gtJ|Qvh@qWexr!b{f>A%paz#@t*7}n=p{E(MO^%
z`r>P@f8%x&eI8DV(hviMy-weg?F#KphP7Z~@?A4;9uW6}M6}NC(4K1}*ygv6=HIcy
zIr5`&JQwT~ieT7;yHX8ZkH+<uND)xyYoH*p=dr-@=2!Scri(px2kdOzI**|^JrCrL
zJQijpUeSgRvC;kFQ44A4!>D(`AoajuOJ673)(bIHkUi|cmzY_r3q1w5@&b;UMz*-V
zr4$|x4+g<*ONb_eY6{9AGpj*9$>2EwkarPNj2T;po|>w2;zz)o@NEfIH*#6z8O5Y-
z+>)l4*Q(Tc%VCVF!+4K&&gQ28%69Nl9JS6r7an2YoLzBz`awzcBi-85gp7Vw@fZ54
z$$^pk2GF1)V<}gR&ifyGaVJ1}8-sxzbx{=DxmrYiy!R86)aD4<{1FK+WVA|5TE*M4
z+?9A@OLh!Kaxyhrz!+uc)VHV|q5w&`;xu3D!_zZ2T9<vdDJ;iFYbcSWs(J@ABY8(o
ziCYNAbm3M4sg(#S`Qn8&VQJ*2x#yT}jQQDk%oGNFcv@RDn*pBRTyHTTYr|vhPE3B8
zebDngJNzIA?U@9xbFRfImZQDDZ|=d=sza+xb`!=93jmAtYUimn*kzBeWo|rORiF9I
zwvw}{6(pyTAEXtR(xpka!xX!P$XklupF&<?O<+cd>lGZJI``xJfh@#AdXG53VO|?(
ze?+$KE~|zj&3~K;>viWt7hDu=EcvQ)D&?04(mJa+;G}u!ag$Z?p{6`io#Q}_zx4d;
zm*Sl;o1t2PqdiX-&Kaeh5j{kyrwv@|JN6rO2dwl4mTC*(lr4xiuyeCZgS-mO1(zH0
zAMIf~osK38jsruGpk4wh14;!S3|B^wvP>>d7bm05-D=O0GPTYZOTR-%CzAb>x`a&Y
zW;*3uhOZS*e)<yrpsczLt+wVC8%updQG0#j9k%UCs~${PDs3so#nQDVxwf`mfrpTM
z8|v^y_}I5a_3X>z#prED0ZQCwtnf1NMXjx&{}_o1Tk4K;Z!qe3sY>nCf&1A7hx?lL
z`BE|z7Dg{4?gVQ9f9-*Uw1&Gl)l}_3&mk|SJ59ONST=kl;LansXZ$)&IDISL)#QeD
zN=B7dpP_%Ho4;O2B551Oj%k(c-1MBiqQ>+?@16<g8_QN#lzlLo$X_>b^@gQkR|1Kr
zoYvZsK=qBSv~7)YqFcP*FL{LzF(m`bb`B}aVbP)6qgI@X`rUmAo~xB5>*1aGot^h0
z!bP?&Iq6eWCk8}3n3X8My`4E`#JKSt4(NfM?5w;LHlh^UL(gWiE6ln}KV;Of&~De>
zK!%LJb_!+r!;D=JJ}W21U3BDIP6l$k!(nmBa(lH4+WMmC?YrP0!pNKZ8xK}wN-NK1
zixy88n}>R{xz}cu@3PFjNTOipCH;vN!dLe7nUj>%?qQ%B-vT8Ha&0NLpq%R2tKSi6
zX5jO=F>rX|_)MJlfgs~?O3L5L%woCP=GOY(bTbEC0fTPy9$E*w>=cMzaU4H;9>PQ+
zOX^Gc{}84_Ob<x6?z$~NMcx2UDW!A7%$j?SMc$XZQPpK#VV@*{nE0A<4SmK$ThjWo
zv@bni|H=J3SuY(fc`%ocn8}+O{0HXz9kv6a`freC3$4s$Kw*MT5O*uDTjS6o&j4@S
z2qOi!k0wYyZW-gMj?wIUF8u=6cz@kotjt=TEUrc{ppzLTqegdFGYd7Ql0uU9lT4bH
zTn!pFZh#``LKahz9DL|ly>aT%a;3-8zt+yqEL!^6@Bo={<g-xT^gv-1aL8(B2G`Wc
zGPJl$jYci8CSz0mM_0@oo$QO6=I{X4&Bu+I$D}SO`jAqk^SYHq*B&=@l3S-0+vU0@
z@3;G{y`ZkrBV^?)AM9}m?UM2*8^yi@g(3~5?~*QyqLmw?Jw#r=?)y5j8@vF1OjPb;
zMr-*)Onppv0dS7SS6}rUHyDpE_xT7{{OG2$f*uYZ8Ddk=ny@BHBsh<%#ccR`S%b2z
zhey9?^u`+7EFJu0dU5t3VDUFdmN35g!P!ryA<h5q6>5~pHmUh5Hq+N8_<aj{QQ~<m
z&>es6W3z)qck(+g<jrh36-TUkJlz;zIwxsL;;k>ckIbO+c0%`*JW`*bY7F4r?a}un
za=bfolDs@4+hV$EAi00S!;4?D{8;#Ikn;CweXyF#EI_+mC#f!pe}troP>7b-5{EUC
zbcgqueslOcaigcS$gg)knXc5x(^SVH?((>l3Bkj8VXXifzKk^IzrgW5&DSf<h+HCd
z6z!*e-7LxBG-n5k@Y}o78oBu)cgmGc`y@}CWJ;*=C#C@OMSnd7aFrP4^=dj3BB!7C
zC{}3lb)5TFgP^;P_q>y8_ZTwq8-3hc=<<3giP#IG1rbu&q)s)oLeJEj+<Wo|XnD5`
zui58j`JW}`IF%rRow}>)d#ZZ+`P^%M@KA0z@qavVt1HiZ52CwHl)q@aGN29a%`_x>
zlM1OOaF@}o?U``rVqpH8T3HoO%&U^5bFvkd>S5A@jGdw8D>y~V?!{S5Z(uBg{0uGI
zC!Q0S@9Ce4%kC{`V}s2)1~k%v3k>T@qL=;$a5i3hHEM1I64GQ!bzC3eYDm+cES4KB
z11T%MRmCE=4Dv5qDAw`4e%THT*ECm6fn=)xIu%iW$B>e56N!&e$}QY@+c&QA5rBw~
zl5E0a^Pk<4DTFKKeyrqJHwk&g?WmW2hu)Je;9JJ<p3vqcxMf`i**SCvZ#0JyyfkAY
z#--~Ud<J-N-Qaqip|;NU0H1FP%cYI=dzGzWW;<*c;YC+6+Yas$F>EPfl(nvlP1x4;
z?$fT=-Y(pXT5{-XZ;lbN<z04u)IXKmaHH#aT6heiy>X0z%5E=Lr29<vj<mhbHqS=1
z8F!%|MmPX3xtF!(!{rT)Tc*UaR}a-Yjg`I29F_$z2PT=Gdm~VGqXB*blvYl)3XCme
zStQ*LmOF2ht+2n&&qh2{d4%IH<5zrHXe%P=$JX=Bj`yKI6~;?umc^aa>y|)&ePEf~
zMW{T#f(PJbPoq+RqnB7^3tf&O&XXm4)dW>SDwCRc;9iBEW=Ux!yC66wL~qMi(bvRB
z>@VU&IPP7t2gXrrs(t*{3R39+1%wLnits%qudz_tm)Ld>GSQPfYwS`W^QEZ&{8ACn
z<~V>Pay)l-Mihv3mwhrJJI++*a*C%e&qxjQYz1%3EM}$?CKDlVoqe{?`PvfYm(8nY
z`xcGa9zcefu2m1G^3G&Tb^pSULJ_uIgz1dVtUJZ}Qx<M`wfa}hy02g{VMWY{K}x&n
zKvhh`#k^NKE0vRF(B&agRlE1S*SypUksW3|4w-mtM>8`-Gyr^=9HmhoU(Xqw^e9TW
zD0zcrgV8=P141T*01?Ru-*atcq}g~A>-4ROO{%R|vCBrlG>hKxrEB@=9B`DDvpsfG
zWA$uJ$UF1M*}F{S`oysP)m{g!$_7lp<*uZSDOs2xc0<DSeO3i!O|>`AHn?J$>L6Ys
zKf=hANwJt>1m+KO@I1BenM#-)KPi2V8RX)R-<=qj@US=m5w7s@`ytfw%$4!1W#C23
zln=KRFymA|+8cX+a7TI}GjedY+Wv6h&3p)CdpdKU19PtD)6R{oo+LSI!Z{ulK@--)
zVjcE~9@{fxKn&hqOhx|^gHu2Zjt?tK05K3?4SIJNCp97RB4;pXYs+Sq5GDr4EBMk2
z5G$cxy&41M_wg-V2)F}w^~B;(@;=`a$0(V1r;((CM!@!@0UNZ`oxZwqz<0BtaVAP>
z`w`L{0CMkw1mRCBe1>p{W{z|UGqwXf5{!EFvv#RX->~~-%Zsww1a*ZFliqDSHB!j!
zDMtd=o6JjdaTgmY>4pLJ_2Eag6X)f(%72hj5PQ6wl=7HfyST&Scg(<<`Ss~f!X=Bl
z2rvA!k~}_gG;C(pBh6tk6wh?CaXMbD`-JoqP4}pmegfRPEDVH#^!Y9?A6t24+veX;
zCNtq|_@$y2WM=CMFh+EVZ>XP8hpC05Ah;lZA<y?7l@}2R0QIGoZ1a%hpexX`^lJqL
z3XjKvbHc<XqyFOk>iC4?CU-Ra0{o(<IxX@8U*<|~*ez(R7zo#p&xg2Rmx=2E%y7*w
zPj=nc@CH=ZAOuZU%9~sVAviDYxF5F~dppHp=C|d5MeCKAq7m8yaHP`d3E47WkI`s7
zw!K{uIYoL=v2N+#JdEJIhbW_UmaNuSM=6Bj?mekruzzhc6yKkEQoYSVyMS-)*?)!b
z!y1?cziQsuM?mh9_<0j%%A)RBPE9Mr=?oLneYYcCmL`jqNMhDffmsIVrlUUf%**-n
zSCA`D?9^scsuvJV^7)A;v=NqKvwy<;hofR14J^n!(eX^Ey#QFo7RVSj<k1&i{gK|i
z+rnsi-0lVhdW+#j4D6krw(ug}F?6)F-Y+7Z*C27F=uIyy>*32Pavg_bwuQZofaQ;T
zoFuVOlP%cE(r)^Iu4)pdd>v`7rU*H7nS(ZEkm2xJ9*W=&3V?B=1t@7J)$<E5iKCTi
zzMe5I969;SK&Wk%PZ6#X2CL4@TL9?K0S=nt_M|&AIF0!9{LBGDv25&@{5DT{LS|+_
zi%6btUTg5%Rnzyl9r_Ns6(M4TyYEnPe!v4UQ$>rcbaG)6O_ua|-tU%%pH=MMx&r`J
zi>X-n+V~#Ojiz(^ZR-j-U{>Vno<zmZdRywQ!yho%^2S~TEh{g|bG1eu`b2qCQJaKI
zOu5n(P<bwzm-_wOv`ksKC#`iO*cLQX0ag|=0T{)jH^c7%FdWMow3o&L+THAuhu-U2
ze+YSXCA)xfsNSqAjzxy;_Hoc<?EiH3)?ra@U)b=G4haQBX)#b*kxoTLKtQ@%Is}2C
z!Ld*guuy44x^d{CQ;DHr7+R!<mZ1c`{SZga`#XN`xnBS9Vz`(m_F8+zz1F_hT)uz5
z<(a<VTmVkZ3%T8yeCs6pP#<|)(+UC@p~}N7(X_Vf#m=_Q-j_mAg<FsMW^+_n#j#(=
znic5zW)n3ibuZUwPNzT+8f+#`-(mhiDXw~(HN76oGB*<*oXW2fYU1}~AgXyj7Xn0P
zt?ZEIv=TNHBGTE7Updk$luv)|j#B^YBA@6UJlR33p-GdP@<vJ1GRo5RJ{>H4I(1s4
zoL$wf#dm%ALjX7fb~8;$%E$Q0ho0#B8O=i?LmD@ddu*DodZwyf2G`RJ3ePl{3De6|
z2^UJ6%HX&yQF$|;G0al^O%ooHFLS$!*4{{TRY8Ag^te@;+uW}T)KHKxP{}KUAm#s_
zII~+eRbox7;!WLtAwO&1LS&cmV?%^%bU!ZUq3glOE`Lu~CI1~4|6>EZQZ-3Z5Psq)
zIRt7h7aA|TQd{`)y@8H5y(3q}0TVm*Lc6J`%4i!mc9fz$%d}wZDGzR&YQFDnjHJzn
z7>}Ze@RX^vEtT2%)K%K>=01^@+^ez!OKyfHLdmtUS3oa67a#!qVl6K|TielZc@jQ5
z%W@AHBtUy-;LE~^K53`&&3xk&q4G_if@2RKp3!Whu#;@UG51d--UX*>%KV|~Z&Sl1
z-nEYNbaE%eFZWgOwyIWs9x|F+`l7F%35vpwkDk|Kq%2cY(?l~9$;W6JY*TX*!32+m
zkn(M7va>6aPYf{EWlu+^cK<M(`l<jVLSN?hiCjiANO)>SeolgTM`CjB>XE29?T#Qp
zW^K*D>A*StP#3ZCM9R(8tduXQbKn#OVN$GSgo?FN<xk`f%M)lDS@Jw17q6mbep+;@
zSoV(h)_@y02%td6GgehgYOItUDy~(0u@j)IS3o6di(+{v%E8z&v`d=$UhTndNbKZ{
z2&KM9$GN;c+`C^_-~dUJXxn!kkaBUtU*d@?ozcAUzWcL!UKlvbB^F<GUg^2Rm6X@|
zXJ(^MtQKBOz&sv?(rNf@ONUuOy8%mL=0ta=fD13%7+m~1IyE(;QZC;Hg-)Fg#}sd>
zxjb*_;-1+TVsW!1^ypBZlDpG0zsMvqlqOWpt&~q`FI_b0v?L=7$W4K#u6Am)f%8W*
zX(<ka1-)J^v$Iv=4jP}><R_TB{NCvkXWq>z&q{l7O-Z=R3l_FcAV!oqljMf6G?ZxM
zWYz6eTJLmB<FU(iW#qr`QNtw3&}1xi`)6cFhr{G2zYV1bmE;%7>T-E?{rkziq5t=W
z+9JXApLvWbrEN;SU2|oaMJ+~KYk}aWp$x^B^l_t?wI!1%E=~t$&0Lex6gL9Xz#Z%L
zcD9F*KPBz_>|NYFGs2RlvX^fA@2I0<z9-f|$9TgrX$n;UyAbUQ+7!}5*ftwDB~RRf
zkMu<hDre+sn4~t}2B{ksm6X4;y2X1z!{Rf!OhE*NgJ9=VSJvZ#^P&#+aY1@y`h7^Q
z+TuD-aV3r&Ls4*25WYiyD~DF!*ei(V_V=yIJz|>&GRh!xzRy{=2<py)c&7+q(<Pie
zmoZ;YZw1*b;}d#d#;$xE?I<_uQPoLoxKvK21UAR{C$<<;%}|r|0dTIRz0o?Lk83o`
zm66L}2ep#Pn96RaTbjVUigoUVadFjGcy~CHRna!saOe?grIx)7#CW=)T!z%Cli1WN
ztxUR%EScNZtT_y3hZ)ehys`e2%%5Le_5XEw22rI?7!Wp#7s>oIlg2}Zj402@6sBbU
zd4){SO-$)bc4IKFSy{Q4=NpRYNRvEVykQ@|nb<8k>I*XPvx;9Ht<czG#0FUjMUEOU
zKvtVg0V;pF4o?b!@)zGLU6T)6T+8#%Dk^w)GLQcX+seNE>Brm<45>t)TRr0t=t#bO
z_9{z?p&1?8ExJv#nG3Y9^b=WLg`M`IJd+(5o}<Sv*mj;0ZPkVAd>$^<p-ND7?v?d(
zEkuVD<E4;Qxx%I*%H0$W3w%W_O6NWY=$%n!JC2XU<zXTcgQT*t@Mov80;dh{Moq)+
zYkIKmtmWuy^GJsaxwh)LggY{zHG&e|e&&`L@Q1psw-r1S=YSj5P{>;{Gq|q`>0fC`
zSGd2DJSD(<19VV}D>@)x_|1+!AOHQ%)P!*6N00Z(T5eQbb$1=St3UCfKw*#-92AJB
zL#sN!U12mF6vK^gRU-D8R{o(GuXRl61wCAIO~5LWbxRX#&-a>Sgc`6~Ssa*#gztLZ
z)<%(nwv~q*C8#)$X}lZ-j?$KZ%+4ffz_h#^w|QkWzL^QiQXzAyzcJb#TYo3wBKA<l
zwl!;+s&kwM(`g3EL`uk%V0+>A7eU<bN0(`f=|N81{HlNt(J;;WtiWvl6?D{XLz@+z
z6_AB``Y+h;Au_Mj0R$BJ!WmRNuLsGPQebmOr`)&75Itqy&EpP{6LpH>9|!xEjQU>Z
zR@=TzJNEM=XEtQr4F%`H;SWm%=qZ@`ZL=wC1Z!Rdb59H9PHL4$3>2-Qm|~JzOF=-!
z`P64VlXixopzixymFF+E+vN$WF1|9mm|ymjLH}dT=9$PjgIzd{4F+oZW>(xqG$5+r
zxP!(QpZ%H2Y`m3UIALa|jtHVVrCQ@V)rMfngVKBU!I-hW4}EU;j3<PA3k$bvVa92j
z=?2g5Ck~j4cTY{?Apv@i$w(To4KlXI8Vx0>vVs~Xhz8ZOg`xPGH#6$oPMAHp5Ks1$
zT~go6KueukHM+fI*P?nB)iIyjVI<L9>esNYj_n^sx%C+5JA@em`=phBT{oh`Avgn5
z)S#zz&!#ryf}=oBbMBFOstX1!!oV@ST%8`17fN<{*oEof=iE@_I7X5r^X6P{<p!yc
z1M_@{Q?U!gvh^1n4ai^uqjG*IQ_DQ4K#_t|KPrCP@k$$BZ|KD1K1RWsY=pkd-s;26
z2-&R}$TGs}_|;eX;&=knZ@p%R0EXrPkshzIt_~Qe)`ef&9Hg4pohXjVxV?CpH@m|z
zlPY@M#CF(SCD0oS5|_g1BKfHEMSsmd>EhD?*?+p-Rwir|DU7k8;B%Q^`8mS2IhrMH
zz)E{ks%KGj#{0hUCff-9=F$XT#!kxCfIrt=gD$I3WQdivq}>Fwdo;85wn(7A)S%K7
z&lOM-;e6^<*?B!e#3V+uPo3l7t>ez=H7jr0*GLSqj<`<f?dD-Q;1~+MxcRpzpn`D2
z@Cx3~#Ad^r_yb<|AuNH8Pp>~*#b%CD-OV5X!;Ebx(#GVHC+gOpVxFB<y4Odc<e2Tu
zTxY?E-tt<`0!Y$*ajOl{IUfXH^lWznmu9-g^gDFlwzIyiNz$lv@Ah-h6jmy31Rv5|
z)b34rs{%^dtqHOY==y5xO9m_S8zV*R7hyic)Q}RNM2iR{y_G_cZp5itYQ9O6yWu}|
z7H@<Wt!o>R$7l^3tuB$SnnLC60*PjFK)qpgZ^Xd?3_K)=i8wor)^R{`UypTzVtT<y
z^?th)H|XgSt`+oE2c1u2zibd3%)W0jrjX~WD|X9@r95+aWS_YT-R4*}e_ld@;I<?3
zvJ-{4bT*f8YNYt~Yq}8h0PEmv@D8Y%7uuFWRj|RS#b5|O0;RdWUldpFBcB>|mR$tq
z+idbb#pnG{e10>i2Oww~B-u|5F@n2od<@z)_MtJE_TI$UJXYL}iKOMKlX)-b<rEHu
z4b5+2M}(;Q7HNVDg1m+GLTJ3uKgpA<Kw^A+zMY2GP2J2LRJR6qMtqS!n-vGvqvzL3
zpw1+%1lQ1`p!9_sno8`qCCd!&Mu;PEz1@p>!I@Q=jZM6cFL>btuaZ5oQr?E^`2r+4
zdFtK;F$|(okE=)u)!^Z84LpOEiDGRYuC+wOuJhq4aSPENK3xRSDFrcPUHuE542p1y
zjA*4fiCzd(1c6XIR6xOW*;+mNa5Nvn19fIh^XG@9X821Vo$G&P5=e8RFTU%oy$J#i
zv+YthG)cmlP0$Y%dytLR!A{!wGj9OOTJ@F~Tl#Kf>r*~fM><8V+)P!>je}kQq;h%V
z6L$^`u}V#p%?k%mvr<c{nU*+Q5$Y@`DD!W;>2vvEg>oyX9@Gw9nr%$~vO)Yk<Kt=o
zIPPvk*!@n=0&)C%#x;vWp*%#oD{Nar1p1V&#by)1{4~R1TE5ePNow`eIk1o-L!&&F
zek*Y)1mOU^&en^SfmPx8KKs%M9Bc-)&UkBbCbcTfFf-q~`Ipl2>~flvdzQSiJd9EO
z9hWXjK|-raa}L6;ZQo^>f<W1NSqO!2L=+bZPP#^2KG%TGAiw9<aX&DK+fpe_Fb~Qu
z%^R6!GGi7LAEu>3?bb?_D?P0pH|fc!b*p)ohVDPfsB%1H=6_Xw3Ltr`%T}vq?>i-!
z&;NW??&DO^9%6eGYL#s@xz7a&6?!#MPfFU{P;R|;%_c%2LJLhr?L0o1k289TmSj#M
zy8~*6sxC{i$?JQY&x$MktiTjHvPd>ZyG@tMZ1U(78z*Q4<7_q8;#2u@HHs#^JNhXh
zhKe^><}BG^FtBFvlgh63X@gK8=F#hXz~j<`xV%Va^xW$`Wa!lmz7w~Va-ZYlbn_(L
z*vqn9so_rKXZ7N`u(>Tan?|>eIeKUA@{Uo9gKy>tyzeIku>q)@jJ|%R2W-GI)?#P(
zdx#qlRG}TRR@Q!%po%9O(J5pn+={IW@brJ&7d^9kJf<cXm_F>wbMBx;6Yv`0=dgpP
zT0G<VU&<dpp!!9@{Srz*>}d3W4BVAn>|+?XUX3)PBx8Eso0-w{m;1VvuVG+E)f|Yp
z$>Z_c=j($<?Qd8V1L=sz?o+di-2(J!Ei#*z)6%!$9$e%5Nv2?eR&V-akF+~shjtt|
zQPr5ilm)`&XAoQ4+UdAQpapRKD0U#!%y6_@$7kuHYOlXokc=N$yv~aZzS6oA=NGrX
zs#rly;vk0JgqBXauKzUN6YwILt(3k$heXuP&UhWw{w=~|11e$xpl9`v2rrQMXM*!;
z;0k|Oe3oKDbsC574}7&8RHB~EaYV_+vK>YO|Mddkl;qKx7hv(Ohx|BX_g$r@nD7Ws
zAC?{MV`<Pl3zf~7!J?s0Db4}LAaFL2$m|A?N1WVJFe<`+=GT(exBc3aIH%(mzt4^O
z^@G1FGlzKPuIK&*y|u@r0YtcUY|H1TeRj7(W(eZ)LCC4*oFRFI55<r-{5#|{0=Ha>
ztG5H%8wUbScJO^BxM8Oh{(rJ6(3A3cm4IeVcEGO88y66OI)=V6CXnr}{lYCs-+b{~
z(shfSsVjKv{|M2fqeO*wL-i4;(Sd2S$&mfd1t`oN%dW3e5;!MhR`0yBbe6GGw9<Cg
z=46g<yg3Q&yTc*RLk9fJ-9GC-36J?7!kazAEE^m958)+W8ZQ3d@PWBSCLOr48`xXH
zNbd+91`DloMP4<y*TJXn-1$e$e>Lbx&y7Dy&+I=+59*dNBJ`95(Q-N@J(%^R4h%@I
zmy`|^WrTF>&h^ymp4zX>*{2kCvxL)I1?esPh61v{UwhL1SOIFB1BqSyiYvD`&?oF;
zGVJDmk|ETSnf0)Q1;`LG1ZDwGgDrL$IqtxH{D0fvi?3!}R0-JsVS~?x(D?55%spcP
zx12o1<ppR6MWG`cr%wjRK_=<P%NsH(hfL|QUTLu>GFK0q(hFamhcW-BBxJ;()5*`1
zfpG}@`bqy#=J$2G?IHo08e7}(49g^6Av2cwkcUjfnR~Y2&`$on5rYqJ#Jf9_w0MLU
z|FIE&h&Va?Z~Po=#ij)|2mAfA^Zx~*{Kq%}{&Pa?0Z)_(p~&>58aia^zoc&exB}2Y
z&^Mg2*OyGT`JbHp<7+^Ev@aq7X37pVBOiJMosiyLa3UjT?T31mmm%*{K=gXZoIZQH
z7#H$?^JmnpfnAMn?e9zwq~#@<)~p0kp1#wHC;$E~@eBt#*R4dF%!|8FYIkG*BL|1r
z(Ac3<-0Y5%haF<(Jy#O=!xOi1uc!6e!~}h;-~1d~&mP%xulGv=k8znqwIU!p>y+C0
zwd@27u!H>ECFl^Ri);hM=?4!Wic$o19;EcbuA_5<mVr0JH03i{oP9sblF;T>pw#-(
z2V@Y8tm9j%t>(v2Gs5=<`@1s@?Y1^n0Mgr)rd`l?uUs;6V7(kSM<~`XeP<I)1SKJ$
zfU|u#?@?uicUi~lI`?4k13hq>H&E^>f%FY7&RD`9mJ~+R3V#uDJyW|E>+wO4^qrTC
zLdJyz2kueQ>q_TNKCgJphJPe3x^&3-+WE8n$Irj@-0{}ZrW<S=EZ>^T?6R6ApN9^}
z#wh~{O&7l-0`CQ99+HWIoL?a?Q%rP{1K0Pire7Dpmgc<k@m12X<?mhI7e=d<hMrPj
z%eHX(mhD9;9X4Fx&?>Zd)B8(6Q%2Rrilay;+vQ9Kuw~LzQQ;ow;an!cCMB)Nbya}H
zXqebt3r@5;m!>XU7Z6bvG)juaVv)-zaTV{?xvsY=QYmgT-GCiXJ}fNEYp!S_*P`)6
z?-=4Ohe|h}-y(UyJmB)ugu(KQ-RCm_c~is0vd!AUNHGp$CkVsp_yVrRX>`tK7H{k&
zx4EXP9F7~~)?9dwAe3>@59u%>FaRafi8xene&TWT4TBKe%SNqfsSvZK5D>3?!W%@}
z^E9p|+pzDy<cq+-Jiw}WCOqoRo45`Y=>gJCsqHoMiTQw|r{Vw==F~_4E37I%MLkKZ
zvTr7<JQGlecCCj{j6sBD%`wu(5J}9<V5XEjyPoON)j^l&wsL>}-qPjqrm)Y6N)fhT
ztePWxQU*AUe;L@B4{jpOH!75XCOJ;eUa;c=&f9HrE-HcH6*e^KaTwFFu1gx{*$EUh
zQ~T?TGH?P?>~XSHI;4zfh%>HT*bk5g05UvH0C2^`%r~WPE;aH+?XXCC<&L8S86Sl&
z4Y&`b*!5(hxFr}X2=T>{(g2p6wTQ7;twD@L_sqD>c`j5EhrWLO32>0!R0_XBYM$l^
z9krx}jxjIjE}sXz2nb+IKUR7gHiKxlE7F2O!luKk{CvVhnuBTpg9k$hn5jeXYiUNt
zw(Py5sn2!o78azSP<CIQ-3KF|#o9BxWy252X=!Li31C(+zWIZGn+_A<nrT`o+F3@Z
z@>M|FiIqZjcA>(>orWI(@w4z4mFRfmu+H0Mxw#rEGd(?9t3z23``$VtDcVb+{MQ&?
z6n_iRBU%XPV*q<QE=Svj29HDptfCKz8{|piiXF#}$D9b_)oP4;_bin@^eWqN0uarm
zsN`HmoMe67$~ux>y@)ln0!tz{08u<@%mN07HbBCe6(Y6qHB6J9`}579_Iv)D`pgm@
zcff4Y0<BDgloqMYru7+<UAodBR_VcY0X(D=?h4pda17vFPUG<$1ffBa!GnsqcQnfn
z)j_wBd+J@4=_2y;C#-{Xi@M_Co8<>ult`{4DuU5?D<{tcMOLmo8~4jhci^Bw_nyne
zNqJA!6grPKu-H8+ee>qcmvcElH#Wnq`S3_I&I7yS4gkA7j$lK^Eee26CQGsWdE+_^
z)qNuXd&dB9y+*z@o;9VJ#pJnI4)mUa;b?V{O<i-QawZe`S<i9cSRAkgASf*abY&>b
z0cOLXbKv6JOp)(DKP~$9LO@z!mNe4&y{mP(#*)%(*T~(WNVGC$ef`@YbMLBy($}`1
z>37|x>(-uOqf^&0y&Zb9$Gd|Vg|*ffUMQN)KEath2SmI#V|<q6dwl_)qLXq+-sWKM
z0xuWW%(HVjV^(95TR%IBXY>1V0Uc_deeaiX#(Elv>VkyY<f$QnE1o9!Ssl1FcUuEb
z+mpaQRy|<ys9hRs%=6#zqhPp_+~EHA=H}CDNS2mo&Xnj6-$Ev2jPJOV`vm-4d0!FJ
zg654<o!cRv=X$ffLOjKTc+x3s+*49B<}SapeP<)?jVy<Q&Ii-!mDHkfX+O?hI|sw)
zU+W`Y=sVaWh_SBKA;r<}D%|LZrJSGZ#O>|I(FVx6FmFZ0_<ApbxqIPf2M}7ej`?oh
zqbyV5y6v9_n#BjDizeij3=b^L_t&G<s94+1<kx`5l<}KVtYaSA!fe6NG_W>ZfK{aH
zlPqy<eZ*D8T`~+3fC`L00&l(>k6<!`wx1qDKgkGjh~)=N<azgojLHH3NC?RsY@WO8
z!QCv0b5Ye=Nlqn`bF1;)%t~vptj_G`Ki}HLcQ)dm?!mW#%Q%kn+Gt1T;tJYrk`!lo
zU-NG4w8u<D1dYz8uj@%oe?Vvt_q`4A!SQ4%$V-!U&TbI4TNDsm_6-cIE#&zoDNLP$
z`?GqFKPQB(dMfD@8tL6{{FY}|HG~Hrz$%_&WD3@0CYiObD4di3)?J<_=Y{^o5w4J;
z5ZZ@M>)LKzG_CKFrIL(~jAWW1X!}g~z3s<U{k>+kWO-n7!aINH0`sE^`PMFGjq&S1
z&dS%*Fg>v8PI;p&Z9vqkuBN7T%c;Po)c9@ta#wz`Y~B!!JXJP)AS@%ExKVKCjef#u
z75N<jIWHG`w(P!N((Uuxn@-JE^oo8lt~-|~37E@t#8$1;O1<D=c6?!0ebi3rVWzl{
zo`kLtqG%hdL<f&c)F-x+!hTp=Etb0Yvz<M9LLBK>e2V5?_}0{{Z)F8)d&`W5Ez_UZ
zDIw}y&v;3NG%HuOcq4(o><J>virzby$KqrpyQNMKSW_bUZ7ipI!+eH(8A&c^H;Z`9
z6XGK$0thX;1uwbznHJ_Qj#c<x9Kdq`^||&TB@cR@TE241_;I_b_Z0!ON2dXS?KoD|
z@{toG2KZ7a>X#X=0X9#$|D`xMDiRDbhOXDaap7_hv&BkU_UUS%N|}bp+t(}<fUAkx
zxg8;-q@>_w+cs&I$1QgA8eKPo_Krm;Ax!9GgzAN@dCsYVQXo1l0T=ApYs;rmy0hq<
zf-94kXhcfcB?Stn#g1-8g*|siyxWY<osEHyrRg_GOZ$a}!Sm(3cBW+5tQ(mGKF^od
zCosn-2y?$#+P1aaQX(-F1Jkpna0N=fyhph|<iQPlPPY#-t9{ZvFD`BP<%mI@<;5LV
zwIzQR@$3kb=;bp47n>sJ*5=1|PWvvrBt_r;2<uT0(4RA5yK;NRG1|S16#s3)KwqjO
zf&%+ppLknpkm>yfW>N;(@S&bco|T96@yj-~J^SwGwR}@4>~ifX*|GZ9lGh*fy0x|U
z<?hz<+4_N2jhvY91qvo~gd^MA*A_ikA4r`n4^MfN$Z6h6HM42ERGBoXyMon76*GW1
zxqBr;8$J+GFic{|KTHk}@u~(Me_M53HHAAH(d_-BvErowGPi>W=1fPq-M?LK*_%u{
zy41APmfG;yyLqWSB)&)zUmJbOPFhf)W$tpLZ5kOCdl{Yjz6N*Me|6a=Kvsk!8(B%<
zZz(i9_Bo#PdAp{i&AjIMNrb@2#;=V@0IZy9=SEl!AuWxIEXJdU2i?c;#WyGX!!P#Q
z2{gZ~8j27riO_Y6a4_k~ZyYHv@LseC*L53fd^FzrT=G11l!fK;4aMGL`mw(BO>Hq!
zUj3pzu|6sGw{IvSXB5OQU%q_BZMw5q&2V}bj<^FR%LS|Kj&=Z=X29c=dkV%X1tuVC
z)ue^3-%w8h4H@<^KS#*N7YGq@krY_$5!%YJ*jmC+Rua|0S|S6Xcls-gH20N`{!*IX
zjQ67=v`pE;Fp=r{Jss%Bw<MjzR+PDE0@Yg(-_b*Kc4G6a6#28g(~(!&nBJLEy_j?^
z2s>g`Ri~o$5uc@LIXM4pHwuOP#_gY{=)1G6Hyl=esnKor^ohz-u;ontMtHN7Yd%^x
zASUB&bmoS^@!pEeuA*`3qXLWzYUWlm5i7Bxo$&_ael0Ode_<qwaXDPKzji$HT*NRU
z!%xB5eo9m4f=OT?h+v13tvJSH0w{q1F0k{x%mgZG1u!*n@$#a2tLH$_$E#l_EW#s4
z;qsQw%CAj>lN{@3iI>0Kkw7nZ%U`fjkE^eG&f}<``TJByztx2=y2uzqZ<+^6LT_mu
zK3d^l^*$-<WZB2sEFN+KO(EpnZ0ObZS^#T*L0C{*thB5Xi>+uMAR4EBVNM8xJx`<9
zz=C)5FCKm=j-)r@&qj>&OAqlU{n*aFjrP*{kgBjQ%Yj{<6K-^g_!#DoXuir^seW&^
z^281O1da`+5VL_o%Y&G^pkgh1WYu?dfCCv2H8lUBezXmQq0=|blPz=z6ciNhTJgoe
zFgB*1tbCBenf?NzJ;OQ4=>yh@uH)H9swsr~W8b}d_bQfmtu@-YS7Bg6^(D}U9C(yu
z>}fk&6Mr#AGeZ##PX6A8Tvkd)k=_c&Led|8{l*)kXWC&dgB95m_=uk9@-_n9@uAt2
zT-dnohnWOQ?eLk0@F-mcyVYXvZfuB^N*cupB=Mks#MbaNT(*FsnS+UNMbi&NqiMO|
zCYa)DnbrG9Aeau_J0bORz5;$6hKrSCs%G&}^^?bb82>9Ulqw>jWPJFUvTwpf(<bFd
zb$dQR#JbVt5!2c&Yy&Y2w_fSHN~YvQFM+P+rOW52!&D2zC3u)4^0<dSCqDl)dgf&R
z*w~n}f_<(lPCj=O(CBZ>81GCoNwueHa?#P!PRdDdyoT7=4c((e;ZJrCr8{YWI9`ex
z%Hn}CIb~GrMI_q_!gWKX2o3x<rAzNVs*}d0zuZt~<0e1h{V>fCw;pb&w^2PpCXi43
z{)v>``&ZGoaz=)}x>K@N-Fe5yQ>gdlG@7pb4N-XM@O0QWLEPEynjMY|{zi-0Z`tl2
z@!2FSabXl~e$rD5jE{&F(5GC57QT!`4OBap=qCL7%o-8q-K!QH)@DVOH|a>}olR{1
zGLVX5i?e%($h#+^*G(v0B6p~%-Oj|4E?kyxdtGa|^|?t-fC^ex2s@P}bTapdDZdE!
z&TVfzdz<G!bwaBBZ9go5iCfgad`-63k8}*yjt})j{3XEfw_{JO51^O>T;K6!zDm>b
z&QTSBaJ;M7&8~EziX5WFCdC@861JKUP$y>s1#=@TI-p{UQ#1LUP9)nKyD?Z)rx9tU
zAE%t_vwS4ZXy$8y%iuD`zG*wpbKAs<(ll8Idt8H&MT^^f=)<(|<;`zlRxB!_$h5Fi
zR@bT;UP!KYqt*~B*0ZFNGNT4AVwn_Rm$|n#B_mADo$_NSh#e%c?So4Ke}yDT!McZT
zjJxVEUcRZg*qDW%vaVPBWbqh}uU^M22<B~6qX9zxJCWQIum~?aRMAK8O0f=4pjz;Z
zm$2bo6|)S^Y;qPM)we7{4-g16cKe>FC2TlH;f3g@m3n;6@`mW)a0knFO)a2<m8BC8
zd!p`9ridUye-4(ImrDMeci<OkZqjS=s$j~#G*L4b^drspcdpW}l2q(izOywi=zzHb
z(!Q%8g74rd=#O(At(yWFXt!XGfjVxYV9+b=DFx#=_w&3S?^2^uz~+xq)AcTmg41(n
zl`Or8_V`ZfFj%5%h=g?+x`Ro0rQYKHlK$I14*i9yfK9*c=TwKF-!BhC|59CJ7049C
zLCj!e;-TkvVSZ3vTOmAgo4Yl<NG<TVK*9^P89Z2sB$C6RmM!BvSND}~+-b~oI(1%g
zh{@E{fG9z0tDfGsE}v7d&v@>HT(VqWK3XStWo71h?pQ{_;Qebwr)8g70~*2x6bgpJ
zD;*6M|M~Syi1kR-lW>01hfk?kGb0>}<!x+ilufJ@%l&{Sw)kMv7Iy;!c=GPMxVWK2
zCq&@=QSO6N1ov@afS3Q3FQCIH87XOe>W#39xR{n`;MS$7yk;dUNd_gfLBRL7q0@3n
zA&U>*E|rI0N>XLdv+bf3vHYi1&WzWlt=_2%&@ZR({&@ZN?Uydcol3n{n4Wsimudre
z0_Eq1YvJsxMiKcygDqT2B|r5=cW!|R4`SZTm{acb#y&Y(nMdu<uY<%}6doz5i+e;R
zb%7jf<UO0Gu*%|QXWZ%tV0UE*v&3ra9k8Tz(VqM}%>qyRn9TlnKIJr8kmEjY6y;oZ
zia{Kf4yXe)Z!Um=3u*qqB_d%N0-rO#RLrF$MZYm*w+n2UP=~_4FgKy(&2K7xaf-Y}
z*XfO{h2H%7^lLH1zY|P5iJLFqDdWx-_gILX?#Y_J>4gDYWxODLby2nI(Ac+QB7X3{
z4gK!&R4_(lSgf-MD&6%dT<lu|Ktt%ecR#pQwz{&K7F(ogy)Q51^%$iAC`VM$AjD0<
zURCvVxS-uQ0gNR?4iL*1dEu~AO~qg*+e<PY#YoPa;4@0Es=6lprNa`KO;GfSY7~nc
z2Wcqege!6Y)%bow^Jyn(5$%4T5U-8zPO93N5Y9F)X7Siosi7zNtXda@(e&DZEHVwt
zE<WmriWHSIJPzR?GhjZ4%iKj)v>~8a1fvKt0P}x>=Q3r?+)}?wi%n0aA<A9~8NdvW
z(k~ao1U=XUK|p`cL@wZg+f|U@)7gKg82DO%5c6uqeq??Ez*y43fErw+GMr*Kvt&sD
zoVea`qI{ZtI$Clnudq-DjCgUQ{PYowXpyy`ic03pW)!_2Z*RRiA-$t3V1?2MVUhef
z9&yih>BqN*uU}g`Q`AYZ{l6;PU<3w$KX0N+IqBP8B*Nz8ddS?w0-S*iPip;LXOd2s
z?n^3Re<lEwv^5J4VIzkbxPhkLr+k;a+1`5}c<jl94@UQupXYHDV=7QA{H7n8F~=iK
z3rs5U2K+W>F=f6;mlnWm-xzJ95(Z`Bk2tWOPP9nxNGO^O{2GBCWe+$@OgVTIA3O-g
zXf{Z>As;b{gr`wEy*`9uIH?wB_i7+x2EZox^zI$4RZH=US{Cj$Fso)lx?UYSavbN0
zi|<X1_LOKDVE}6!n+eXjcdf9*Z|~pW&M4k!$8xU2jgs&%RuGw8BP63)x?*Nj;zecQ
zv*57T=HrwKWrU&JExRQUkQj?`@&6r{0Z5314jumL^OWwl>CV}&i>6lky|TpEsZS7C
z1o>wXh%EGN!RP_E<}<ZF>A8f2l>MC#VPPpnE)$%WE~x@epa<ZX(w^}ERaR{Z3CDwH
zn501;xO&&%vbr{Qg4Bx#x+={t)j;g#?CktyGo)Ac2*@QgnuVRk_d?A%Fb31beW$}~
z|M8O94k?|`A*JgM7r0{4K+6${;CMKP$!jdS%)_G;A$+gy??(cp9okAVj{77KU`n4(
zQBBf~&w*WEE}Sc7xM@e7eB=l?f;6q3uFHi<0fRHdJQhrP)dBvNB^y8~v*TX%*I&oW
z<*#4W=&I27sQE^A_wHS5FrR|MTYPob16+RvJU4+Gk>)Ni0ahMEufv}sV>hnAqk!r3
zvq(OIckF!jnS2?^x?9q$E0O|O_Vjz~DPmxll3JEgX=-{qAe~(SYM+`q17_qT0X9`7
z^ZH=T+ef{h-&7k?qX)u+ST@(vJpr1x7l<E$FHU)4^4X1=_wm{%q@E0y_>4*ei0T3c
zs^>lhu(Ve_A&)XiQ2l(<3(%gTt5IXD%KXouK{fVkS~;!&HV_{2anUsqzWbHh4U29o
zO3%)|)A)g}2e3wGLVzOVIU5CSK=q&yfZC`GNfqQiXKAb&ywXP0_G3i4$nt040Fx`a
zA2TVnC}<@)ej5Gk<5Yh!$49WTVh$MlE;?-k)pNHNiroi2<$O1BJV-~7LMXTPBq-8n
z=pu;j00CEo0w0u#g)toPvTBn4{C70kY;3fe$lYbUsSVqU_~eo!<AyJ;*i+`~E%!?H
zw!yE{ZLS0cfDdM3iR!`l-c+>*6<}zGxS}9iFPDnEl@8cl4`ud27r#1U*7`QWhj0k!
zOjG15{rN2L?c29q*{0P1U8J)@gYi;Y14#ECkR(iS7bQX~?+7YsF6LQ~Y2Mv7$r`_-
zG~&n5)gKu9G2d$LY3F%KB^5kMj!iALt$s>T46xn?JY;U}6@cKi7fJB7V9yjgL@c+6
zii%nY%<@Cp*$-gMiF}r%(t~ZEP|<Ksiig24{r!!L7r;qK!hJn$;)b4lkq}0Hg1hww
zy`gJOZAwatEf~L|Yhp!}M8XTt(9Vj+<U7n@ZQQ|BFIi|^3jzL%$}?1{K9FHt#lz=>
zdIH+Es_jHfApT_ASJ5+8{8DQo!GQU-SC(U8cPGCh!LE_*y)aO3R)V^uhy<N~t%y9j
z_oyQH-mXFE0zysF`#7;L2brj2fA;0WeIjKQb92afEifD?q)-Hy6?|gK^rco(pWVm(
zXj(74=K#T*y!(@Ou#Y~vH@b$0OwAMn1*$yf(VHLvISV8;d;za1dvtkMV$gptD-KEt
z@d9Ap)Saub8VL;!*e+Y?(MKWs#~Yjgg7ic-%ze+gf>Vg(_ImR@@jrd?c&4cr-4Y&>
zJl6m1_~Z&_Jz1g6`9K_iQ2o90xx4+g;9i_0=?33B;@WC$b>BUt0AMxs!ndS&soI7n
zGoJ<Ly{l7G0^{p+VVQDYtDbql32Df4Ei*gg;+1N8Ldn>v9pTJ5gwvO9GKfA}1U)K)
zyFDtyAYKx8$BAd$gjO7ghU|}yLULmbicJKqP)yCyl8o2(@09!$7#6{WLWnOKTA4UG
z6DPf3hVk=UsJ&`ew%}>G4#b<T6n$vxxq(<OT--hLI1Npnc|9e`fi5usS$9>8Y8^a3
z%b%RbQD)Y$N5zGOBG7;-FF<fEXx+;8$IDRu-ntW$*3=m?u|$LiB_CE6R-t5Q*c%N>
z@ZO*Sn45Kf@Z<aUiOmthJ;QyT8u26&U`mQ|j5fEtyu5;niu?Pk!Zz*kH&c;!za1a&
z7!AUd`<I)Uo7?>elYr(zCEI~_aLcGq1_gy(sdN@rB(C@=DYR(!#Zd}w+p|2AF{EGE
zPlv`9){?;<0|VY-9}!c%7tHe8+7JW4v*A6*s7(KxH=hDJY%c{2KS)i05%euy9rShs
zR#<F4du!)I`L7#k+F4pLQoip(Lp9IL8<lwEfD|PWn8y@gt_7EFvp^Yiry$UZ;%@U!
z`hDTp8kqd9-Z1zJQC$b20w^A<#h6)GGPAHYWv9)QdVOD^B7zYT5<)}FdhxNBC(C>{
zZEIi9Ep2WA6S4SQx}=I3NbCF$z#>jf2wqyX_4$>(@tPp|=g2lOgLgND>b=WN8GKa8
z2){s!1~3_mdHidEU?x_fLhs2>0l7{i*J^8OL=c~%J;zVsvIDE;V^b?oLcopWpr$I;
z*Vn%&-YsAIPA<8<Hmdxrz^a7>nstXd-V-h9Ep{1Hl9HiRWwDmtZm`4?wF1qj7;lNm
zLZHMH>F|-Dnsghui=<^Nh|1eNL$=02nji>Bz_mdn*PgD6i1fY)WF(UdwS_3Ev>$96
z5t~1HMJ-7XY`8}rXN)qwqX{1X=y7L`xm<01z4+B|9>ZeS*IUcpb47Asp4R;;B$QO5
zrYdop*&`7Y6DX0<qmm01_#1sg+le2HyuOjH0G{tBuPWw-cD4&Z@pVqZ3X~dcL51Aw
z=j7>_mL$O44Nb=Dp;3|e6v$hA3ru+lRQ7?AnoP)jSQK=CDapIW=mOs#V>C46>0pVW
zk`EpnH^c+jIt%x436iR(mO>v9V_?`|x93FE1lC*x1S<pg8+lM=uzWO|14tY<CO?OC
zf+{OWen1Vmt!N^8#=u<V#CA{l_85~M2z6JtH|Nvb=LeTOBv9KPq@<*r+}y5JNGPoY
zE1&L?H6VKrloL?)D!U0H4^WY|_P2XbF)$Izda+gp`D&}4wB~-hapgjtrJZh>oD$6`
zR>y52;7%Wt(lk&;WhWH^yj=^LfB3!@W08`NZbcE<<L0qPMHrKPmz#w*7OJV-z*tH#
z_y9=P=N{jyyzKn_x>a|S;Wb7uLBM|%%)7L!d3MaA$H=*s4kh5=VZGY2+1uhb-c)P{
z%DfTCXZe7&>>YGGTsNQ{_^{nI6}fcCYS18GryX;G|91HS<i(rGO6j)p=@j2Vs?0QD
z#8}aroe3g6)KU-F#G;5fY$YwIu~U*_L1ptIQrIy2cKDTVx)?pFYy@JdY-5_sv(#wp
z$B$&Chzh9}I9H7HuMsK~izlwUe0#%=qtF`+n(QA`$^}499N^mLeIfRCH*^3RTk-sm
z6LI=3>_UvE$~*kQm{@H&&y{@iOapS`W3xEs6U`a9U{}gVYrr8!j%D>2xC9M)&G1m1
z<gf5_bgDC0KLrE3q`?y1i+>Iar-gxc*@4Q(z8h0Xdb!rG5z^5EEvTn(nB@?twCm?{
zuYL>~!2`<^O9_=l;L33?_Dmd`(=>q=@1o6a4MaiIjc?=|NvG=XC_|2Uwu&JQ*M=vK
zybS||dQMKx`?yb=SvwnilEAn!Ncj}H#MS`VV!G)e0^Zl9ZJYsy-6_`7plACET0}k9
zDv4P&fH9=`)yv;HXW9cQ{-E-SBAvxzON0kKWFafn9xor17e)+4(s@v*9<fn!KpbOh
zXi`~&ml4L5cY%~UFAl7q=^-yTf}5`*a#Byc?fgC&Q<3hI6Yvtf&=gH%S6v9Ea*B?G
z^We_vfC4)2^E_e=^rpj_)oNjytvvS4ImJjC^I0IU2Z3+i9Cw@R?=9*<Wg}LDStK#N
z^n5x1yvMPT{k!ffnD}@_=Vv_A7T61XWlXO{-)suhp8xccsw{|xCfgf5+h3#(UcbjC
zDQX<+U}aU2=g_AGrd5jP1ZdvRqgWQQN5_I>M`S*m<Et9vw5<We5STa^&vPJ@D!fM>
z_dEH8b)CoMK-cpfcm}CxcywsG^1w+}pM8Ri$Lc_}A~7WgG$riHf8LdU{EmXjDoTyw
z7Op|PT{Ld`h6VLA0=QZzXF-m3+SbdETa#4NyFu45FBE312xlCmb@%Zix;F(cN*X%^
ziVXu-MR%yr;qREn5sGO7m!XjxP;>%_WPjs>?1{=61n^tOeuqNW6=LpC?f&U1(9xo#
zEE_POde;wYQ2)-ROaP4u9RaN7L~{K}n;%C(7`!{F_wT_4^ZRsh@Mp(P{;~M~=-xqy
z2a#CnNA6{Dz@OBR993>qt?s{DyPsy~pZ{U)rS(P3&HV2)oBDrW>yLQQL2CX#-yu0<
zp`ImEh03tKIo!&UVw_CA8%uga&`U>8E|_Y*yNC^>zhlq`;7>p-`8PaUS-*c`$pPBq
zPsQeU8(D#R>CyjAcoVv3t`S0{Fnb9vNF?OqS149?6IZ7I05yqkglX+VO)`YPgHVdy
zl-oqk9$p;sdhTQ8+VJSEP-S9;z$4nqM!6q+DTDl_N=rvR-J-4Ko_DygS6iBve79Ix
z@W)5||4R-2c&9%kiBKA_FJTO5W=*4T8{DnpK^6uS>a0?T&U%ksoH9sT86G3K9{1Zf
z$eqPex$RC$_@A{9g)7Z$g};l=%D=(v1ok8up5M9~o1n2`0hBE5X?QXZ(0I(7iNEtV
zJ3OurYq{30IeI4&I`x=6F5$iA9Db`J@B1P&taj~Lr&OHLu8OyTzB&3W-`nH|;q=3b
zj#Gpi1CNYHz~<5<IU3Wdcg8BW60n`_-pN)TG*!PJ_T<^$A^R!HE<pGfUMTQ~i-!OF
z-M(d^z6yN#vCdN(yXC#ToMHc((1r2D%MN|o-?z}}vqOIDq1dC;f4wSbzWJS;9sUa$
z0?0MrvARQd=vsSg1zi;+l|;Wk9s21R8(2t^>7%K;b)>)ZzJp8ewc{LoI<yAIAQF(P
zQCBq&U29KypjQR`B8<O39r}swA$Sz_WPxPJ<p0mzz>4|c|J(0L{!iTJpKAvy#=Cj&
zp&RWjIGhBs_pzv0&VRis5FW+9JOzl=|DT07yz^H{M3XVMb)V^9;Gg^r<?ET(OdkG!
DNj@(2

literal 0
HcmV?d00001

diff --git a/docs/assets/design/fused_moe_modular_kernel/fused_moe_non_batched.png b/docs/assets/design/fused_moe_modular_kernel/fused_moe_non_batched.png
new file mode 100644
index 0000000000000000000000000000000000000000..bc6cc0aaaf47bbeb828b4ee438fc660f2b11451b
GIT binary patch
literal 232056
zcmeEv2RzmL|9?rSWJ}pZWbc)-_sU)&n~Y<fkQI`>ju8qG$}TgT%qTmA$jZ*18UN4W
z9I5M$@Be;pw|jrz9^st#=lywq-sAb&@6YS)dtFxS>}mYd$BrF4D<LkdaO@Z&82GV7
zK>|v&JkJ1MTr=!M)a)!Aj7-f9k5RG;?SG<VVKM>P+EKC!Q?jt=T3Im|nCcqa=vvq^
zSQ^>^MZk4kV?#?=1sTvCQ*(1&N)}Oe1}5MVwT!O5sihsr)`XH(5cn=(X=i8-`~`}E
z-*Sq;FE!wYnTdg!eZPjyZAw-FW)?06rpv$uF++Vb5YQU53h<v9C=xZbF*E`>0EK(C
z_Pb!KuWN3&-$$Shv{QN@8v{d|{UR2iE6}prhBmgQAWK*w@CXJ@=ml00N+to|GW0*I
z5Nv$F7fW3WSew^@A=1MdHZ|D4UroZ8o>@Y~R9!?^TvtR<UqZ}4{rlG7ZCW^p+2~rC
z$bby?MsMH%>naN?+kVds9QR8&*<nvHwt+wAP=TW99rz$&BQge?8W`HbTH9~R4g@l{
zGqw7*P#<JzX{ZldBv^~OHZ~ypZ>t%B%weO0)v*Ew{!Q!90>xiUHnddL)W8l{WhhLz
z*kRp+mWvyj8k@kM46NdQFD-Q8m0<<8Cb|Y7`~Ca(iVm;J{yf5G(*^_r+B>vphcCk`
zSvUw8nnR%mpGVkp0NniN_a4lK%>mN?Oml*)s<LJ#U|DfHQ5{wXHF?F`4j?$ZZ0#K3
zNH#P8<{I`DWMgLnG6q@dnu~lb6|@0?Ee#BzO9miU@M|4u5XcHBWCjXv8QR%7!hoX-
zwgUl0CUzF)u&cmuY#h~~_cK80Z||!*thv1}!ceF}8{hv1g(>vg{vulGew`246xo8n
zHu^Bg963yY4xyxIr)y&j@Q*#X99a)W7|=P`ueJAp-(K~vYcFO9vM>aO20Y2e&|KHf
z^!DLa0G0*Az41XkXs7-NHh?-XX#I9yp#1YY1Rgm~*nHg9H3!4GNXf!s4$O?8f$44F
zqcQX&yvPUyFj^L-uNdL52198<5PE^$c8@(kIT<s%6|ip5+hEt>4GCY@0j}@24~%5L
zy>FiZ6d&>ABZ~fXXMm3AS^zi|wA8bO{-G8H+Zr0kfJA^1m|9wap+gnW2i6W)Kaiy@
zAP@edjzd@U>rAl#48{sN!vJV?&CLzXK~O0MJk`q3#uUKHo~-!h+V!s&1no^s?F<#I
zboZcb5Abr}#zSOeWa?mO03$Le(E%dVTnJ<i+Lu499J)+QMn=$bV3`h;8}0oA%5Cjz
zKxT%AuCQ`4F$oC)SALby09j^d2$v(!TE7j)BS`~xJ0}~AnZbk*3;etNSC-Dq4Xa~v
zn4Lpgg--L&Lr$#w9Xhn`|4J?j<Bq?}Mfda@7N&na7lqBo5nS|7b18YSuBDynojo4(
zs~qS%Uh}P7{$1wrEwf<K<!0vmPnZRZFee9~SN$Mn0oUzd%z}*>#sdyB3(ljM1zh#}
zd6)(JKC?IiHGhS;+n2q7Nu*<7Mad!ruuZ8yDbfG|02Tg+1d*<}sWH^P(FfEus6-Ql
ziV0IdUl)K~urM_+fa;e=O0;ibyFWQc=y?A{(ZdBt3Jgtv+}nTbVKlKGfhKs_&jTio
z{~MAACQ*J@@-V{;f*&Y){)h$y*MkIXY;>X40USS8Aizyw3thrL-U_@e3y15_yMfkV
z3e|!A3qv&r7YxC_Z(!`VdH6Tfkoc~BHZIsI{JHj70Uji}KM{Xq(0kPZPX^p%`t5sQ
zwf9V*->dn@EG5{OkKopSTE+{Qo3j8vzzPbR+FI$_>6`pd(z9@Kz~mzgW$bViALh+)
zQ{k{#0Ivt9t-s&M=Gy1YM?mMV67<#pmcEnpP>BzRm7Wdo=^NAjZ@0#w>WR+2AN8BA
zGcYjxr)tWt`Y(<g_BTuLog)1&&~U!fm)YT(%V8>lQ_Nv1f>YMdLq*&_l!^rQN$9Jl
z{FBqqVdB~E^#3U552{m873V;$IMAXFYyr5I!_5gesrUR%hiL~09PO1IrX4tq{XDb-
zx03%Z#}r`keRb}C_4WVP-TQF0|M%?!n9Jq|+6Pea`}@Hkpw55Q!4IF2-&Q^VXR`&^
z&Ja)$=zmj3_(oX%LeAP4TA2gZ{$4w}mIgp;fOi|#uAK?A^*z5jVCMiIfLIV<{{TN?
zfZcCpXdq^2VF9lJ{51mE06{w7gT0BN4fNK7s=#NE5%6m-d<gj1!Fsm(Hl}*eI(u#I
zKgkvvJ!N4sbO5?w$MDAo^_>Rrtxw`VbEPrs=`tGv7Tq8ByE8FyhzI}}`5}IHHYV16
z)&*m52Y&d&?2Y4q=^bWp%s-!Yzzi%LbnSi{=WrbSd6P}Y*jNXyW$Oa_3l2PLKfRs+
zbo{WMebD*;roQl}=NG<EN2=`mt^dM+`Gz@tN5}tsO`e_eTYH%EC|~7KVX~i48-X#N
zf4w#WQ!;*E8~MKyC}W1r$ZvD$Z#4pHRc7d3tl#7a-;n+<^b*zmpfPmcmz|9%uy>1T
zKN=4>8l?6ax9owB;Yfz_P4nMzmVdE^z{&m{%i(}IlE2Xqjxc}@_F4Tz77(nPzl}UN
z$o{nal96H70qt!efx6)iWb9X*{ffiw_u=;obi@1n`?r3F+<(4I<c6{Z=&0Z->`^ii
zUUpc4{Rw5_|B(P5u(x1u#eP>N!k6~nZ=wG(vIJ~9+mm>J9mpgD1_DFhQSQH3<}fqC
zb=U)%hH>sgLgy%lHJr?UB9ZfB<30j1M@pPMAc;a<Pb^F<2Wrl*h##Q%w{)F<zQkZY
za9zOQ#dVZU1IN-&E;3*dk-y1|g6=f^Ax>SmB=~)i0kbRr^&$g4BfqW8{>s+D-y-mT
zwP1lu4H)Hf!*>K85-iM2Ebt)DVcLiDh@VHcu)y{=9f5#9ZMz7_$p0Bh@|~{o&zBk8
ztOri;-+~U@Cw>?>KcAfeRVBY$At<|4{6R1J{5YWjlLfyoG+>6;zh7v;X5<K#aY!V5
z?|Xx)jDL~uP4*kr^lJoV-^1}AyEFd9vg><C92eY1IxM>2@qxqq2QKn{BEj|R9^gMM
zxMXBljuKqIvCsWGPWaE4RqR|`3~(U)mat-HW?(tOar+a>D_Af8CL`y^y_gCUL%%Pt
zU|xlPzr2FY$ZyLlWy`O7Ob+(0eDB8ih0mP&@NEZd<J<Q^0qF$WrE(B0lGzthb|5RW
z|A{N8^gep)9I&$lZbiYpEZhfn)nSpv0dsvG7Flra^%IFKSTFxB7WBjXEHKXUyDSJU
zCI0;^=nr{tl|WWN7@6rH$}C{DzYPwW>*MP?<A-$YUszy&Ms?NG)#ox`1u*i9x441y
z9&GGDc7`9bn~a(DK%;;WJ>X0^B3{gP6wSlx{X{hX_xe_UnA!m+<=>}ynBU-EPxJ5@
z`E9jB!O#E<WMhDl_YbvIe-eV{{O-*XAUzVyv^y-}nT{|rxL{EFd8nNg=1u*v5`JF{
z9Px(p?`E-Kh0WmaQ|<qihzNW}4pHsbw<?7|7J8<Z0JVN+?*AFF%f!S5q*Xae>@qR2
z1L;$MD^P#Np`<DY=~jMq-T#YkwjXTfhRfoExaVQ&Jjw+CkA40`)cJ=Tm%s~zUpHJ?
zeQ%z9ZxMcng#Tsd`88XP6Uc=JH_6~wVu7>%!&qWF2$CGO=6*g0Bs}NcAx;4o<KL+0
z`#ys|?UOukG#sGw*L?^-ep7~xA&|98S8p%n8<b_i@(=)d(bxqk*@b};XwtU*%wBu>
z+`ct3EI|NQ{WjketmsGZI2Jg#V7TOle?J6E7WkWQhd~Kf@qQu}2UC-N5Gd{UHbfg5
z{120RD#3qI?y|G9{(@Z|X$He1I6pZq|Bw$B&OiS&vlRI%!;cW)`_2Eb*M9!dypape
z_uw*{8D4ORIWoijGe0>9f4M>S7plCWr2&viW-o)QzPYZgt*JgBmkwp7`c~C9bTGA3
z1M*ZcF)*>P0TrR&)%U(L0~>4hzkhu<8Mr76okpmDgw_OJmOV(>2rc_nS@*kk9Bg2J
z-luYCQQ^AT;WdSq{o!P!znwA%3LE(UBk}_syqN~W-a+OTxV+t;7g$yys3s05KL0`D
zqaz;40k>M<kA$;V_#^i%<^PXI{@rPZSbyCG{ioTj9Pp~N4KV#8*MVOOx|U|Pa8CXU
zP4qi^>0b~iha&`zm?M-&IH-;yB{;DD{_H}mM<qu2ZoPg=_V{mOdmJ!Qg&~TAiH(5;
zwl;9I{PmpgsHgs8nIC*h<u_*0|DX*h96wm1f0v7mgPHqBKpVb208UUxJoTRo?V~nu
z|2b$MB-4lLsn9R#dn)ZgX8o@kE%f4nO8cwo>ko`B{8aiY2b}G|aL3F7*A@OcHV&jv
z-d_j!Guhy_8T^_5T%-f2h64M)$i)BKNdK2<A{=lBCmc+y@K;^nVEXUtA-@PX{o<Mb
zTmT;xfBLyt^}h|kzu>@d>+A;t_#nIhf95|Iz`wrd{;vpmiQ0gmncP5s+`F(JYx_P%
z`-@$>aPP!_yxrzs;I80=ZVB6yqj09q0uRd`Ql8*i#9@yGTo}M%{P%k-;9CH{(FlHh
zwSi^-CRgzv^?R|f{K_#zdk6gh2;D#HM-JFPzJJ6I@Stx_1cKx37v8V~yy|`5w?B^I
z@9+H6qoNi9_M4a*gF#@xjko7gWnq#9S<(wa6N&?;bg?iA>Dn6pfeZk@JE-qu-=Cqb
ze{_(Al?kTW!btK!VjQB#gEX>-NfPc@{98!ULPS*GT0@6kLC08HQBGY~7{n<EKmP3-
ziu`VwzlKntA(H<r73Lq_v-pcpERaL=`(xhzNO0ol7ZaG7IKRI41Yb0`u5ozLn7(d$
zIlOMH-y4EIl$Rd4a4-ZOS~zJ#;6O}(+`n7CKSQNQE%sM9e2szq)vE`ee)!VC#oeK$
zgBN|XY%tsU=OMSFEQ((UK-h^rz=5Oy?>=-moy0E>rvoZLUp8Q}1<oA={s_YZI>6^c
zLhJ}-?}(z`*RBrrlnH+15AYb+VSna^MnLu=_&2zi+dp;)fC_XlvS16ny~xvEB3~uo
z@HtDU%=(of97x+EI=ff&hdTkEi@#=w(3e2=$0rUPb7TlzkG(_re&rpI|DRd>YX<<l
z-ge{y*})gc0Cqgdo;I<UY#9c)g9zeZ+rhuKbpcpTUuXv3z0dG8x%-QH=qyN}!hU}1
zy|ZYc&^3jw_P6iO!!ua_Q{W5FQwW2A(q6)DV6Ou#fj0=QCIbUKNc0UI`hLD`-LEHQ
z0%iNN104Gc>=y*<Lr*r^pHXPWWMC2Zwe0=h@JsN?wgrE4{LP=Hc{`K6G~x#shJF7d
zX~Uu229yurw=p2?fE?w(Cp!>ua4ZluX8=AQ%$Vudc>okzFmM2}r7>{b@*6e>%~uY+
z>#qgzU)#DYXfL_@9;1d~e~+Q>g9ZMc>>gL&hsEBiew{}kECHOKZSDw6gMpzHpr!x?
zz~{a8_HXz$X%UkxV7lyyH0ar^(AnF6(gEG+>p}@{FZnxQ>p+*v6q*~}5}I88yQcOQ
z>Dy+$s{y?TZ2{Wkes2sN0FVN^V}U~GAOI4e-vbix*&2E<BhaIL^8S5^@-I}!I1jdb
z!xXXupYI`s3^0=5o{Pge*)L)$e}yW&w|VDR&g+J6?D={!`eD0b?<)K(_WcX68=&XY
z?{8KzCVnq>>=@}W31I;x2kjZHlaG|%5nP=be2gP8c^#9PAMto724WJTfS@TxvXDZt
z0DdSW<TFMRE>2QXC`J+@MN$&MmfQH-fw5VABYPwJ+wb3RUvt~CG9D<g{v4}2=Qw_4
z@ZzonzYykiM4aOy{73yEeT?BCED?KGgOMnM{4okOg5TXE{+vUQoSG2*jaH6fURR(x
zh9~2l@oP=^-G%TJBmY`A_%TuV5yLxARf=?w(H`9gXvdO*KL5%%po2reI8F&huxhhw
zIC2pCg{e=Rj+}_SLic0B{1UOpZm<#M95D;9R)MJ?{@+yW&Ch>%D%_8Wef%7|8?v^L
zdHmWLB3E;Po3pbs4l{1SHIb83y!xaFoS+YN3Hnsl>vM{W;}mGbSKTjC$&O5|5WKwP
zmQexfubJ($*ZVjFmh|Ia=KuJjo>-*+X`Em`mmIk^;m$UW0)@oA@s4ESy3Q6dp(&h(
zkWbtc(inVqP`R+g5|U&oJkn(j&?oO+={}hK#Ck`l!C@id*@Wzu>xi$sD#17EH<Y~R
zw+D;ms!u*1SZ>8wpR3q?zOYudZc(qD(N~WA(EhVfu~G|J5g}>VB}pHOMJm%o<Au&J
zxsMVXOP$!knr?hM6}wmSn9b___@#jnmpQ$3Tsmjj5dX07hFg4v!v*8!ZGYQc?Mgo5
zmXVrCTk!~9=0$@hkA8I>(X5Q<1-mT=W;A?CuwFl8>K=@u^av36Wch+a&oX{2TDqX$
z*9k5^ey`H;kywr(9+QFB=Z<3<1!`A2ai<H1yd$PxIL{g-J$Smqm?RY#ut=qNpG~r-
zW-2xz44ncIjpT~^wi{7(!jt-idiRSC7n{ydE3R9F-{CBzc8NnxN_8Mihww1#T>xXj
z2;#*Bcil|6n=73Bkm<|%tf)L|CP9_&xu`-aFk7Ezf9^0TS9)>B!HtmDFIq}PcOh+%
zj#9Qjm4)m}dUWP8+F5`xp*SNJ%no=Hxy{rY$q(YXP*FAFzV)}aH9RxndpQ-mwdtv+
zDx8JgP4Pk>T5h~2PO)oBjnIbcuIP)FF!1~ywSxMZsh!r79^R~3H^Lfw+Xs5HT^|}A
zuo4V7j8P_uM{;opCEBVY>aiiHW@*(&#yY*Q{$j}+M#?bdtVB3B-gF_vRz-n|^nna=
zCY*`&i#;}<`XFw{dvUnI4d0L7PUK@NYYDR*ShRG$_;bfZvFHoy8xQTbeK9*+D~TnV
zsKdu)Mq46?g0H2C4!p2toOq<qh(KZAW!`If{QCHlwFTW!dxX=d0GGN|WcM7-rTE1W
zrN`1|8iK?|`Ob=v@{rGy>{P34$kFPE)W7qM)i1W0MM4UA=;g%+;$N^|Qkx=#&Y8~#
zl&$i$W3C#xr!GUT`FF*u2!9GNJ%NA$w5}Iz7Xoknt`lP3V#MbFetv#hos+L<YU7xj
z)wcXaCPp1*4aw{V3er@7c?4?T#8@}1R&lOy#!Y)b@5ceA6>+*5-s!t4m2^T0KKZ;C
zA4g={*&`w#Xl&46s9e4s@IoX$w8w1}(GSm=fH6sq=*p$cTxj)p%=NW)LptrEbK{IR
zY92FAk8I?%b&u*u5XC=G1s(9uM099Nn;Bj~HLSoVN&EV0(T@|aJ4||QUn;kyIWGvJ
zJ8|_^0Ne%<1);*SKZrynqpFf|TBMIgU2+Ps!KhuA1}-l?W3)D^7BeCoKc4A~i;|9G
zTmSNQ69e1aRRoa%$1^8uQPuY)p$<k*{v5`oH|qB;z2g&}U7Y)3V-|iRJxMmaG9&gt
z0)~3tRVnu)ypgZGOAZWr%l~P?r>h8Icp3ch@*UTgo<JI6#zf{oowT@6k@TF*YVi%X
zbaWw3QI3&l;U)G&Tn@=8;q<ZAs)H5>pp#cxFPZJ^fMPP1?(OYjK37yfe|g<;1Wk$l
z%Qc0k;^+E?S~ma|#E*#i(srC7rCH~GJ+Tv_&=ode4+rauJ&A6XstWadT(@Z9%gY}Q
zZ8ee5W|oN%+N#yvltN(?_uZW@=<#6u0NU=Xkua7yGjKXeLMkVexnaAD=shl-Zt*<d
zUQVX%NcKSpSzIF4yBv|{7+gglb%wSyKa^rGLJwV@TMJzSH#f#_lvopxL|hj#_<#;P
z9fKQ1Ix6@4uDE2^ql8$1DS4;&$SY9Ubg5}7!{I_s1YL?r3^Qa`DEc;z3tPN0L);90
z=ykSlw{l#$(a|MX`x%tl_#H4F>y5vvl{!E8RN29Dt?#Pj)cRRT3vsy8;-dk;Sh;gJ
zv&;(}+UT|ZWrjh_sN5M<)Hp7@x6{sq7Nrc%@m>Q7Ky&W*Duv}|48&u%5@M)mQ?bRb
zh}NgUiMnSGCPW7?S=a9`otAUn5|PW>%s9=GBynLKyZr*RPyD3(OQTgIDNhu95zsUH
ziy%D5Unqa2EP!sIl;s1btdl$E<>E{3E(Vr77EUN_Sm<qX=Oc@Chhi58fzJM#)v^-m
zEme}aPmSls^i>t88p*oR_P_&BgS!uO8OehZ3NSXHliyIU=5&n4Uy#b85r-NwNAI-<
zU3YJf$}AUeChO07Z+42Y<KzhPZwXL>i5nqiPY(c5105e`wcK0~{@#>*Bz)*(`T@k&
ztW_G&nCxF?bl&bWbp6g^tb<2xn}5j1J0rH@)80m%ik)m_W(;rVt{b$Soho@v-y6_&
zMu!RdG<9CM5)`Ay2doC(s=ZBZw*zfQ1|zEACbza^{Ar6R6~7!zfMFY6HxoN(XLqli
zY6VR{-o18o)9Fu$Oy`M&+S`z6M&jY3?ZGbbBmy3Tmgs{jeI{cQJcH(_OOka--;v~w
z2oywkau)P9&^ALo(+f_c#xdb(t`YL*+fd@bTEa!BFz@jjp_0>ej_;CZVW0#jA{xyK
z!;v&k0${q)P3@Lcs5g|vdCm2mn#APdM2GM*Tk{>YC}hH!3aS&90Ey#n;!|14tSFyt
zb2<F-*6pNVS&3M2Z?!82>}$&(Xkn2tu2*A}6uN%Msus8tTn4-hT4nAEUW4*nd*azA
za-UUe7jjEVhedSyMr!gSC&TgRAPS6X@-qq|baqHH6g~znEEl<ju8pKjbQF`t$zm|h
zUwV8|1?Y{MXJv?WN89sT7jRl0uqupaB*`YdN)J3xS$4#r^FyL&(Y6a<h;~;$jUM5u
zm~^dg?rx%a8^UStJ*BM;-<@7zz`0}fgI-KOw?cjD@3n&8r&($@hHs;4eot0rigY2j
zrRFx*{@NcyStepXHG6IMJQUTYPm~p?+8@8We}L)+0iZ`BXj#2!ZkSLs)hcS9NTE}6
zPc|!jT&z;sJ_|KDj(b#21@>7Fy%Is4?O|#cg$_#~{p@cJ3peJrSQe+YNY?|MfC#HC
z`#Ivz+iN7mPBSmmxZPs&$T56SZWP@nviO2pU9j;4V=a~HqTy+PA)n6ZeYQ6XP~>v&
zQCgL<<_aSe5)}@&%qjuxQa;rYefgF$pXA5$t-095j-!~)ZCPgF%i}UsXU3arA_J#e
zi&}1i7`n}X)g_G~WpZ5=be^}gRHBh*QV<0|n^c(=cp#OIkRwQ_Tb5{O#8<p>Y7^^7
zi`>@G97fy3+7ev~O-@umc-o9E+mZU+l|wP_D?{(QJzJgUKI>fE(&r!1xp3}snJgU0
zK1k61n><J1xyJ;ZU0uN>ymcS`Xr$PASNFmN!OhNV0~gAhgf4=c<FEK*p7?UJzDuTI
zeOPKePu{jPWYVxdZ26h#h?stDJ4pzKi>N{4A`P+2GP6>aHnP3tVBI}XMb%V96DFf8
zf>MUYnUnrF^_iNSZHd}4;dGkEZBu0OT(fSBDB|os=8dvsckHL$Ds{$$Tz!Bg0l8HD
zE?reA6}z1BT?vl6w;z8;mgjTbPu&hYuDlh&LJ~w3LTP8eP7#a_^9^^&04mz@PA)*a
z@zVLEBQ0n3N7=WDsAzIY&2Gb{9ZAhp40<UV7UqL_$2+KkYWqpLb-RP;CqATRCV%F>
zJ^EZ^7>7m$!J@>~>qRw^`n~3-XCIE#1rkhjXI|YrOXB@TU0Z0<gFBSRf``q#4J}=T
zuDboC?F*|FqYTY_EYCakB7wzct-dr9r3aRtJOc<0!xH{03W!_YmCr8QE;uGbs3B`I
zyX^R#5D1T}w-UjDUPQhplN@_)HSGvNX%Jn?;*?r}5o_HW4Ud*<8%&KK=Y^+c21}67
znyI`>a%>9W6_E@XO^_2E=^}bAgA%7?+Mcjg|1Q(|wt3t2hWK@pjv`LKvRGV;;$_jH
zH7WJEmDygj1+VjI7b(ysVqL52$fqBIhBoulsTHX?>zdVWrn<&StkYcs^)?niHPggP
zO^)M{FF2cbQKS-tvh~vF;_$TvK0DKyP1kv2>eLZAwtV^nVULCBGaHe8e*lSiTSxqE
z3(3hKOC%%qh^HHFB?D4nL?$I;RbDS0Pp(gM-n?_-<8pjCy;54Y`|T??)}+%;nxCKR
z+kP>53y{bURj%Cn)H^wS>ZRj2@j%l<)>}N`+PtAW>8f;M!K?Jd=f%Rhh?uRHnerw`
zaf=r}6%NGn>}V2Rb;bY_6js(JpXRfYbAMIanxl{I-+6)02~#0O)=W#0ZO!aFw<$-A
zoAwF%D|SNVf+CsS#xfUzuU;uUyg)<xR7{}vjA|^!4|eg--jetpVxIiw+0N`Ue0W(@
zw~KdNc}`;R`A+XL?w&xsjQFVkgu$CLd1@#6OHMr6tX^^L7v_JZl(oBSOwvl93o=tO
zJs+aeKw1~iSF+h|6%4Ta_Shj1qNH;dG08bvpXJmsArg@x-_G}oeLT+Zmu=suZJime
z13T&yVh-O=BT6xJS(Z2(&Y<)Dj!Vy7?U&Az<T3fP<Po3&!NMJ(1<|1XE+^w1ZZ9zs
zCmi1}id^wnU)u-6Md4YDmxZQKXS=aC&fwL0N2F<Fqem)b_~Y#YT-I;(?(HWi>jdtR
zS1an$E}sZx5^%2I$V>{-8S1Qqds*BYgh&fi%gIx1R|s2qd>Y8wFN``Xf!sr`NVpR%
z-32v$paQRELxiqZeVMK@dzf)7&gaZ>X!xqWW|>ZUxv{&0uU7Y932j<zxFALB>dsD&
zT{0jqWzvb*ZC#QLh@9UQV8%HQFFc#Peupg8c5+yxGozbJI35^-h}YVZ=F~%UT9r_&
zk?Bmf^ftwD-GcK(yI?E6-n9u(C^tPpr)Zq>NjkaMlz0_^oYBjzc^1Zxt~m9Q-%snx
zGEUZaBnKCOcFywAYx$sE{W7wescG`=IhywC&HE4l+Gn89u0TO0WW_Cu>8>MC*Pb`4
z22@5|Sqtd{{>Y1{=QKuZpM=)dd;`&>v_JWcDW%_|a0rb^a!^?3Q2qzB<G!pf8%QAh
z1H#R6cViUqA@(Bm7fq5w&ViB6iN#CS4@h5Z%xY^?l}7?+Oiauv8edN^wYU2+oJ$?c
zC=om6|B0`4JO_M*u6iVn=*Bh9fZmmd8o24r@mDXtOB3}-uE)Oiuwk)VBH~hJgdTXJ
zGJaxS<iw+^^f`j3@QK5=ifEA<EG1%J+lE}<vg>8bGP~@4UV8e@urF2Hi?vGXTEQ!d
zfF`|5$~5*RIkC6N%SL2r<7w4nmGI65+DE-n2Xf32rDnBJHBOv}d_bMo22zHCibZ=S
z6=5c_)3NA$s}=Jo#>qU){FcC`flTEQ5-+Yh=uxyB_~u`p2n<OLEoK!~R!9P-;SRp&
z(+gY0j#NlN@%~8f8pWRK4%MwpbAgAsAQV#f*H6V2)D?VLvxwZVmL#(wcH26&jIBrQ
zQipdvK~iRVyTpEOxMolclS=kR5o3|+($H0^-D~%2B*MsLuNOa1<(v|MiriTYwSrqG
zAe54K0fTjMy65PjB!uz=MjmJMe=xC5Q(1ITAdUxeX=RSN`%Vq^$ri;dTfBmnlcG+J
z*Q7&GC%~^FY(&U?+U81|o)@_hf1JPHpgG5Hp$?2_s!^oHT5jEyyUSwGKiE&q?P9u)
z>kSW(HyVFY<2$RdMZ-4_nJ?%BM+evm&Y0}kY0tsk*9duRaGn&iuhE|W9#DjPfJ(&2
zPp!72#i!_?9be#LP#EuGXJ-1~;C3}j$40qPqy57dcAEFN)NQ8}3eK-vcube~vun9!
z6Vbo3NaVO!^TnMni8g=noe;t`B>O(l8|g>oRu@E;#{5=ux8#zX3A(rah%`CLb~@zz
z<C{ww=1av<*!Kt;15xd`*^nQ*8q=$ajXuEjwyFTPvk;EC_h@g>uqo6lbPyt24jwBP
z|M;Goo42W7IfD*jV23HZ`uP2#viwXQ8jBp7Tp#lSF~G2~Rab_ZQ%6D;*5^X&uakBY
zpxcx7mh7PlQ~2ZPWNs9jJ+tdp76?)Ev||Y%@w^lXpn%+y9OTH*M>b-o3JE@Kmi}0a
zP)VW}xfQtnDGKuGa{$CeJ@x@bpXa+AhFFAhJ0*qB7qclbqyk<*&Q&OHc3VNkyd4$5
zzCM+Sk%mU0jk}KEx^ZUCUWVH87|AG)Dy&ZFxN)8^dy2kbR&n$)2u(4az4bJl$(Z+Y
z+M~|*H(a^LgArV8iyH&J_;;pWJ?oImGQV{5X2HenvUVm?w-YzJ$!`j$HmEQ0n=1q0
z7)}!b5H-t%5zUe!RzMe>xW*5*+cR43xmV^-bG91Jo0xxb4cB?0pirM}bxVKm8q_Qt
zCB^8Y)xmt1P^g7edr5qLD;JN8dWV~D1zA?=B%Nr?41XW}TEvwht<db|V+*x}MblDi
zeH<3a*a)fl>nr`G!;%~AUuPhV-sz(1YIn%S#`2D(M&I59lS*Q~q%GH!DM70#PafIs
z%|3ea2cKOE^WM3Td`H>jg_Ez;#*?9|P(9WuV_9+z<5_10^5S@Nko1N2JNBO1d*%i|
zo9JU#?{cpB5#frU>wt)Sl^VD=<xn*oW<7Lx$lN8*6a+np$P}M3ae%63i)d73jhujg
ze&H-*nFhdK6FGr^fbiT(Tie0ip&n3M3P&wpo)2otZlONoFeU>W_LhhMTzn)@0P8YL
zMR7dw!0PdZHjG7GHj@B#uMpF0F&EeZKFyi-sT#o2@dd&KFKpgxh3&}olQL=*kFp~y
zjMUfb2VecPveGvBih$Gd<EOU|yF%g|hELV_VpDYvEo`Q%X5WALDHZVemO3mDzntqY
zvPCbA<gy+{k=gCdH+$*2UEFQKyYz;2eQ?)YHeR>?b^c%>h-7tcFqm(5t0QK5sf8~X
zPsbTEyL8d7&+*RHsADDb&9PU`AzjjT7&>Vd4Dbl%J(os@Dwok4z(H3V01mLs1!5n?
znLa^cz|4?)03F8^V62?m-d-CFWj9mFHeLXleR8wPLtBR=mSJnTLyr0OjGCD^iR(Jq
zr{-9Ksrk+2Nl(U#EtVQ~TpFdyH_@y@rMFjS)G1_QmfZ^&bjnd7H-`dKWF8JrXIHFl
zuIjjL<Ia^X1*vwE@H$|uOn27@^X*KmZW_FX=+^jPQmbY?Qk^~7C_*Ls=(&$H()jxN
zNU-Lv9IyCjRL={2WNo+Y75?bRAVo|RPN;XMzjZQD-UqsRg3tBkRVAlt5^K9sX%Jo<
z1HvhKRpd@Fi4dMRyg{PWFj4`88>J4I!Muy&^MU^7=mk?1l8H^Iiy=I;s#(HAue{27
zR|4_bU$rI(l@)Uo>j<+4ky&$f0k*(JopNUYGHYHP3nDZK=p~Eq6cuu=I2wIyND+@U
zx<@e11-cPr#oEiy1+`>`5M@DUaDm6$c|r+JS`diJMCHX?Ypw;nUwG_3!9u>vP1^~b
zL!{AJ2C87YnI6omca|L{lA1SHrbDmTf4*+b>$bzI?Yd$3=$cDKq~%m+s(hY_JcV==
zM#c6jL06U*L1Lg`Q>6MF@*OS-qA%LlZqJ+_aQ!m-@$wF^Kn|114-i}$d0a(El@Jq!
z#?tY_?h;10tN7mCI(wJJg%{mX-uR|<;WM31=asvS7w?{W=N`y1VCj9g=E7ZDZ+F=X
zB!qI_TKs;hchA(jucnp}fOg&7xlUcxxsrP6x$Y~^IrOr#gndZME=?t_<ad+>qvc{a
zM+d8P5`#wD0pE=ld)Xj1G~_k_^$bPb10cn0%I@;WKdf_O=gXXYiu45%&4pQ;l!`~Z
zj#$8Q;sIuxe3E!5f@yowi=9Em{t|n1Z*&qQ#n_3pVHpi<MGJ8~gXTx?7h@LiQdv#g
zDU>ob*d3m2ER7?dLNInvm};$_05EiHkjk20p*CxadA6q`+u^LQw~y$`)Vr#MhaY)G
z1M$6HT8|)A)U7$jo=UkK+Hhxc^6AyvAYRk<6d8g~z4wgiW$IP4ZoF81o1R^cuSd7c
zH@x$aC(O#$+t?&2sPJWbnD4+9x7}?{7K4VHD`Wn_Z3#l&R?}VdNpkVT%f^d1%ZkIi
z66KT<keW^BR#pii%y}v(Qj`IPN+3UB{y@5i8TT$$>|KbK`<24G>Bc0FCT+-^T^&(g
zRe1!upN5>eQiS&0m9SN+A;wstxUQQI-O8bQ`$b9vf?dz-=0@X%XdRY074wju!mDID
z)p&9*D_lh{-mEW09wfMf%vFA3VkyObTX;DQ{R_gJMA7OCT=&de^4=KxHM23=Iji<`
zXEA;WqCw`4D^Rn46NpC2E^9p$CsB70C&D^U!6H5X(R3~RMHt~#r&ZyS+p`|oS}*iY
z#;}=s00sA=H0uH`(C^}7F|K~pEt7h{us)TZEm#^yDRpl7^9KSF=lQA==Vjur5m5*S
z;Co$G%+$!wL3=$?_Zl>cReop6%9%HWbwY&IdEIJy*Rg@q+x82)bBn|j#`LXsY3^=)
zo+B4!rNKF2m^+uikCKq;rJfkrC*4zY5tgP^PBXy7lDoEHFh8G)7Q<qA`jT|i&9@Ii
zIYHV17g+1aJ6@GU_7zwNt<KmChmoe{=~g4<nY1A+eR`X#J(y$Yzd{@#67RCAjw0=A
zoFM3RJ;y+zGgGs>IA|#W5Ap#udR8G;SceQthQd^OrZ>Nd99<?v+JJ9n@B(<Mvnqs4
zNbtOrZ7>F&s5cr>M#bc~!CZyRLyif6AEWR__jZ>#*`uCk`snUTqnd@9f|w%p;IsSM
z@<|a=UKHM$Mpf|mvG+9vO&AyNcdEA&sZk@!+4;7L$0}FfjTU?vMcqL_-P^SimYvjG
zR>FU#rN2m%E9zXGPg>`5Z-FVmIWn;*;o~?0@MoJYV3tdcYbsINV6>^=x<3;DSjw|1
zZjDteVg+xb*Su^GA5VQM8jm}(v+<U5u%;_hGcz7wB<7(ksCS}psIQYM5t=k>X2+R)
zpj6xNofMYEsu?(@e2Lfm;Y42{qk|e6turOqBt~k{bEz@>a+t_{CUKgma4O=@ftaj=
zxnVp?{4ZKsJubarHF|?`>Gqb)6j%3V$y~Y6+Zgr_Xp?WExzM@^;ClgB&qeEbj0!!K
zjTH=0a}GxIBeQ*U)}lA>6t~SNO1H-1K;b}fyc|Dy{S06rI*gnZ_PCdHy1NeJPO~+{
zBth~`m^_QJB-`VHry1ORALB^hn=+*<B%e-^jm7Wc$U~9Cx`PU_to6e^F+O&=2mMr*
zal8x1n}QVt5mGutVxuuy2<CS4iKlBPK8SpHHGjV|bv3rh(_1*icM1|2zU+9DKSa(s
zw2S{ZnMM?iolJX76b+k1`4sBHf(rZN>$Z+MogADZWVyzdnVt-7uMS|58C6|c+>Y!|
zepE!;n1DCReP1zEsAWZp%Kdig=fOU8zX?jGww2u2&eyV(3V0vivsLOVB+WM%-Mq~z
z5o@!pc!~!6N@$8LBHO>CeVlE>IJePc@=BN#ceqwKn_SbnT;o#M<ce0D2*pEf`cs6s
zvb;@S7~aVGrJ2NSg>BTcbaQFEE~C1Qpm@XCVcv|8vE!}ZYt|SC2+dkHikK>z&9?6h
z)Ri1)HDke7nk4%nxM{dy$g`?4S@t2?+&OypU^7>v?W_i5*W@!VU3Zxm6e3mSOQjvd
zsh2`k@9M0t@zR`QyxTvABV4#sE`?KQv3b@4-`n|mV-R;x!bplE`~7)srIhQIsQB#1
zcf<5>;#C+tRkhttc$UYWPb|9qc1m~Ye*SrGM>LjlQk@&buR3@gHuAh^L%1Zwxksr;
zO1<&H1c_>st#0FJE&W|PIS3cD-`opN9rf=Q>P*)lrjjG9pD|3Yt!G}QNh<^mz5*Ql
z*Pn}9r{@EHExoN75%FlA%$Dje<DI8w`pV8g9SL1B1g%dM3EteuX&k?;*LU_tN0!AH
z$;e|LY|gluZu4g!9$(gOSk76wwbYWPzcvRMC&ZC}P>Qud4CRaN5b=&ZODZ+1S8-l&
z48&ks+~i&4&gy$vd!pOPf9>Uvr?o0Ef49Z5$g@Jg%S4~ZJkK*O5Fe4L*43+(Xa2rj
zLFe9s&LOf`Z?I_dBW`+SG{q!9DLKK4Uc0-s6bugRcox-&wHYTJ$ID$azxw%uSu*pB
zOr=!w3N7Rn+tG$lWVF#CGO!5)uSv)>ry32XTF#@>fktm{=pZRQ?THY*->KV(!JHq4
zpfnSLq5k})dqktY-s;SIYMt`Zl_$BIxY=tsnVN-YD)$_U?^DRm_U3yI4iwprpD>kP
zb@EM>j|6r-17wCQPGozvJ8b$UmBX_hvNnO0&}_3w3h>MbOHvSPL_ic|=5boXO^fy}
zqqi}Jp=2kJ(NHb@DDXQTiz8^iw5qz8D#YG{&bNi(tP@HuLWhf9=KaR?)5AalPP8nQ
zPeGl!8W&$*eA??xL#CH!@ER+eTAutR2~D>+n|1J;cs{;$a9ZNbq%|?8S9t;oV2V^{
zIIlo>@l0|A+oSI#MH4RP192_-4tE_fFGAkexDdQqig(lzI4NchgXzSxB3tW^FoXiV
z=*S!u#7^Ttu9Fpos<cC*X!5m`1fGo1iR3&@d=xC+5HjHte_ot?cQkZjU9$Nmh0raj
z-Q`ymq&jb{y(({N;8e>_QH0pm=n%LE=+&;|uQ$(O+jvbHSR@l+u~WoZo|3O#PVc6C
zvavL$WoqaYiE^3T<r*U*j>IJ_jT!Hx=H1b?-E4o}mnPl!ws?pYK3;HcV~R!|oTe0^
zi*AhMm>_&z6nZ034&665%f={;e*7h8ZH12j2{A-@()RV>a8t*XhyKqlSV=s`Kp0Pd
zlr=40zL4)}R?}h3d%g5xh<J^M^o9m^&x(eDH=wqf-_r+u8KMN=Ql;}Er)*TCc#A8M
zx;yqXS&hjF-=;hkJ(Fj(Qbs;{>8>oxr6t;IO>~Q{Y;E6!?K^ri0Vl=7K9{rfU~lrh
zi4J0}Mj8Q}s!{oN>~Rtkebt1-GkqLc{tLF}M%u(a_Yv^!)Qq5GknIc{zu)O7jlzI+
zGa!{OgCCN_fbeP9%T0dB<-sr?okIN^{oxzBgG8Y-ML6hcoFSd5JEkW}cinf>3n9-p
zlDfvvSr0Ry5v^mF-ft`0<!o4yoj}iF)G78^6d8QckSgp8x!E0L(XhNo>%bDe>?~)N
zulttqJo5xC5O76QWzc!`;-cHom}<VCnVOT+TSmSF&Av}nh&?1Fw{O1QDk6=0^3)~R
z?m}Flcx?B81NIn=YA(&h4t-C-GaYQ5yy?x#YphAlqjL{NPQ5n&xZ;iex$v5Q{uDdr
zaQA8rMMl;80g@}`7|-s0$&hHeJ_i`T$k+9IlgrOjcJM~lhyu!vQw$cf0grT{B}0AF
z+lQQ<-Z!z+RWfr>!l>e2wpcAyUUS`Ym~QGzB~S3UCy8a;7_xvo%$GueyI$@)AHt)g
z9MXT2PP_Dctv{X^AcX{8-8H~@jacIGI)sM;D~iX!WAfTu*;?GnNX71s*<}}0N5Nsg
zvHT0=;cQf?BhvD=pOuGY#==jnzFod3C*a{Oe8O|m5^z4<PVdxB!z#0jqN&H4_(VJ)
z20ptDa@LZvPip2IboS(RTvQ{L_Qj$+JtUE9H6%#;!B3Z}cRg3#yOkHr8^LBDR9z0*
z7?ZTA&1&Gjv;1t#Z3H0sls7j{<gRgFgM>c9#<l3LzcgbSn1Actt77nUM&Y%M(J+kZ
z3G3r4z^)C{J<*wF+9;_q>ntBkhu)aQo)lD~8}>MXhl)<&=?%BX&z(Pf)<6lMp1qa2
z5G**{f1>>ICk5o%h}<@m%W5ypLZ@By&3V_g$ee4gP>q>YdaPFFXfr50>Cnbw#_{OH
z#C=03|E1N;``Q)f>{6>brDN8zwUjPzLU?8i&R|j|`4`2zC|HM=T$*3Pc%WDBF!l1`
zl~eR3_zkXsR{#yCoh{;1PYFr_Hs9sei5Fjz(8?Uk&+^PU(~Z8rLVjC|Q}okC^kYD%
z2uboqrFW=nazyrWI$aJBMoU#J(8G0}Z>z5o>Vx^WL!2Er<n{bJ6L`F6aO>O_k3F4n
z0zC<$J~p%F7^CTlk4xWf(mpZtY{OAMoO)V&h}CgMoXi<3(QbpZj^mkt(0w}1!V|$<
zytHdG{Xvu1bx5>#p9w}?vB2V4B{soCz)o%f+;n=F5@39e+)lv&z|GnO#ukxqJL@&b
z=tYx1t!}E~M#Ot}$aC}K%%o!E)IEKCZqtHiSXxPFuf5j%9-t9l>{Dk0UE1kcabil%
z-RX{Jwp_<}v@?z~(6De0Ki?dkhVTt<BnabF0-mmTtl-=OVgEbq@;T-w--I#`7K5=P
z%$)s(Ny3re@$v1xRC5gu(0?0-yq03YLk2EhKF2MFDn7vUsU_x8#h_pA0|z46ZOd@>
zlt{sT!<LPkx>&@eyWC)`;HHhSf*#zwX@*M%0t?fPW9ycq-R-M}IL>n}&b{kr@<1vi
zR|+h1!X>XFp^0V7X0~N%>Y#3<J@mTBY?Pr^jPbB**RMdU;#KiGKK)f7^7mrih~d2)
z5YTF#?Te(D9$lyio%=xkjN&xJ6kGI(ZG2T^S*6{^uGf;0dN(eFLxN$x%8SamXFNb+
z<oS$uQP<pR^0kbcsjZJ(BgA%j?j^W5eQYUn0s?<Xon~FVy>nkyXKH3HV}~$|oKfR-
z<G{N15G7C(Y$e6wI91=_sz$N2{$)bv9P>+;CQ%&f8>s5qO}BKL)=N#zMjNNeQLoOb
ze&GNFq^%qKcAg)*D;6gOxaZPZu<vJ`VjR}3H$BxMJbW3wgs-%RfVmFg&=O_)lEaEN
z^W{WGU2t9VtR~&qjpSS4`CA1u3ByKIU~yu!$qw9iBB6CxZ=zFzOV=SjsSkv4Q-xC6
zP47wLo|JM|Y!k@wLDs}ZMkB2BXGAjX852o)){~AMTEj$hDh#c*Ku!D65Zk#=?s1LS
zBmUTcnoof#fj2Eghlq*(ku96jl;6ht8qHie?Ng7(Oa>lguInrj*C}des`5xvkfXWm
zKR*)DSQnhSZXq~KlV=4XN+FS3dp?YNiu!(4k3@5*iD}Qg>%gnUv8GHMa%_sy9K)th
zOBy_{acGhP$Q$!yu+8f%HpKj<7_exRNX`=Ta^0ix>z1#A_Jbw|>1+~3u)fUO7U}9-
zCuDhL#bUcjjHI9sCw$i0R2xI?Y`v#`tHu-Os<=uTDXnfW3C7ac8iQKI$a@ws{cG7b
zWaA0)ix_Kbq+~H<;AeNpF6#&9>Y3I}_d|FVOLlA%At?&u>|{32{DwRyr}0E&mI&sb
z<`rs~w~AZZoo$R-?^tDjB;d;F#v4Q{<4Z%_z*v%bZ=HEkL)k0uwc@mZf_Jww<^5((
z5$g^fU*72LrijINvM7%&A!&g6LB@GkZ0t$j`ob61VXkR<t7O-+*kfhMotPyoC49j}
zn^W8^DK($PV+#biw9sg*dTw;CNfV8j;ojp<RHzSftSepgFMDPcmB^t$H<d_``ULP7
zE|_+6@REI$Q}9j_NZsOYwQWTy-4R9e@0?_{W7Bq|e<*S~Ln|M_)5*RzYI14p*i4Ff
zoJ9yP9jk@!=~#8eI{iXwu1AwnpBCE&QS;E=;-+<?GRp|v&zo$&QZqNmi}<-O_Y4r=
zt?E3<r@ppTaGU);`iw;?Z_gly{Kovp3e#%*01|E-f7cHlvky5FU6wXYT8E{~wt%q4
zbA{JwU8&yqWBwPFW9%F%?6`rb!Fx&N7l2WRmw}z!UKyH&<w~T<Cfvn2_^ME6tx%k3
z$5Lp~ThhlvWS~CW`U%~l{^KMoi@vRg$u{dbn&P0h!V(a2vsF@w1dC6A(#k{FeK*zm
ztY%(y8gA2byQ&*>H_9$dt7Hmi2W4h1RH!Dh8Od(SGTlMc+H^fj9LK0#dXwnWg{|mX
z+G{vi+1q$4^j<2HixI@vWfuenfk5hz|EiYPmP2RE$Qo_eMa$ClbCWCT5$N<Y3ds`C
zeE@3r&UXh2lfK3qmd|SFTes{I++s3D+TpP~=2E`Xg)fwjl%Kg4{keJr)0U>PF7Uj^
zvLLTjrq7rD$*@+ldGQ-qCqpDE$WvKviM#nrznOQwwEVfhPLgO-4dt}lI$>`GdaMSA
zXt%4-Ld)xs9M$5)InF0ucSER%1*$q<;Ezd(KV;OH3VTb?rLG@E+Q<Y!yO9e-+ckxW
zeP4?q>g4XE1eTS!+4r?5S`XjpM-Qt*Efg2h<M#-RzdYRY(Z8T4$NH)(9wgw4L&qjN
z@ml(}=df>QG@B73N0z2VG7Y5j^^OWLuwg+;Jg20GF-f}qT;M(aTO0g5$z<S^H@j^L
zQ~eQ3;{zYAv?a?Z=FPRCpS;)lq<s$jzNFuc0++iB^tAcR!1QM;)+Dq!Q?>LQ_DDc^
zu6c45*b7;^JHRaw%UHhtiifxO3^9YL4<T_fTA$fKX%YD2Dh`E)IPdjIer1e~8>OdG
zC|@LoU)A!VIMtCbqpK=+&%5(I;C+Qq$ys6|@fw5$FxPZ+>$nAWYdgKAPt_;EV}Abh
zn6>k59pargYza4tY`jRp&k*%$>EtlQY<M=P@~l$U%m>Sf)N>n0^wF!rnT0%<m3>94
zWfi=g+=U}fVX3dus4sKf842Wc)|R45%{6+9@4uCceDy(ZAqbS2OQl1iO@gUVFPm{8
zAL4@QjG#xQ0eDN1Q!e+e;VT<qQw_9GKE2~xu6PMGRp=t9L_OD5{U!39l_|sI)}r*!
zV{eT`vgHv`_xO)zZ6&RsQHXfYh42p(flq?~1!L+VmjYqXEgU3?7+y$wgap@W65~tl
zE6?$xXw$~JAS|`(`Wv23y7^JWZp6Y&?>kDwmzgY@Qaj0f4L%9j8mC+eB}^FhrU)bh
zi<kC?;G~=@K)5Zvl*U?}d@k`8b!i;WXK|u0J(ER3gfmIaq-2h0h`8oS^|CHxtv>bV
zF%bYKZb~ucl8_nT36Rh11VXl(vAEGF>p+;7%m0(>Tb{?`4YA!kOnsinEjA>og=n92
zYoYsFs`%E_CGpeLpW#Qbfi6IvqR^D!shn6W2fA^q=ZR3ZJD2^ZJTV3D%X-oRrXNxt
zP~*lGqQv0oG<w7lX^<uGTCo|_=a78zjnStoMn8XF16T6oQy-%BC3LoD+RTWltd@1?
z&$LqWB8-+t!{kmmhIW%PI*>>Z5u4llOzAb8S9SN*7ejj@n~YwG6Il^V^a8~LPuphW
zVUz>A+3ji4IaI3D4p5JpKZY(oLC348o!j#*W#>|ba{S6UjfIJnlBF@X=9?~&p^Cnw
z1mhRp0wt?BN>-2$X!Wt>mfC&(@TFx@=~iDp`e|I+%cqm7dMLT@6+FA+FhyGWPM$vb
zRHrt0H!t7V=ZrG-+bNZ5U^k5hIs<J4HHzMP0Po|ZobgzXIrVO<4CK|>d=x&X&4>?*
zPpxNzMc#(0c4{+-?NXlkboXAZJu9BZc2V{Cy!-Y<Te%kow?4%zwf7jXQ&dTlqFp*<
z{z<#ZPUG3f=x06GIn4Sb>Y^JoayR&PFByluJjdPhL`!N~!zn}iMZX2#U`x!DH6I=B
z3Pyy={n4a39N#Cm;=Afy<x$x))KB?$o98N|_0M)&vw)l&$idHAHMs2WM|g}acMfeQ
zHz^BdZpvWr2{cS0edYx_Tzz-5nyNW(m0<0QCM50k;8<$uW?IvObMG6LugJX&?c}yu
z55W+w(3E7vy7yU7f~fF$O%UG2YU*g$iz&qsAE=`xPhpWM6*;iPCcnBr$t$gzFkJPJ
z3D7VRZC^eu*6U%~IQ!vT#(=P%2d1_{eO7V8<sA8*5FfJFA7xYZ0sSS0o0fq46sk(N
zo@8W2ASCv(sBA<a5pA0GjB=#-rPh&Ji777M1Zc>L+Eg~jU)T$9GSrN&FN}ysFYYk1
z_DkJ3^OW!G*-VMk__gQ8aWECUuW{G-RuxFhysu8>*FR0DY;aq_yKYcvD+<_KH19&=
zCa&~0`XUl3u}7564PdjnFor`<M7H#1XvuPVYno0zZbb|m<ZY|Ev(>W!=4%|%+2pB!
z5idR5%_<#mH&sP~9KA-<zXJ8GOG){tWU@yHosJ(<=QH&vL6P8xmlHEA=|+*Rg2dm1
zJl1W^^}ehQ&|?fuca)tQWy)*MT=qq)cd}E<++!?Smo5w1+>=)?$6E9)Vh+3?y^0f&
z)Qm28jed3rUrc1<4lSvkRHR=#rJw*rcs#f<Z27V6V~H>)wWJ`xmT!1^WA5yk<8Bb1
zI;#8Lixd)Jn#e{eg9ec0)1ts8k$x2da8M|^Kg|Q@?oAik1j1p*X8<rsnhf5h)}69&
zO-R;}zi?A^YW^noEpZ*Gp-LbS9`Z&alqUa~aoQ7UuV8v`)u34Ly$Oi-#wNsu)pn69
zs+y&Kcc)KXD1>LpRRa{=_K8tR3FYof6+gv6-4VKB&!pzOxpEObd96;f-o{&l=Vj0d
z_=H@uD-4M2Tx2R(B#BBEm?Su9e4gElxuzTNj?**My4K(gMMRfxVq;e41@5Hy2ua*?
zsU3x8SGMv-k<YLqDJIx_a;#UdTrnSPCqMt}e2(`A#d_JPR-Yot_lfJ$U60h>;6e1)
z??va}S^D}-YD!@WLHr~mMq83mBRa|21oON4HngG<>l|VbBRitre+<*^<UDcb_N!pX
zrFwP-m9z|Mml&`4O<nxXHP)wUN#3U*Bpau3kIxknu(N&$riidTTcp~*B7__Dbh|Q-
zF6CugiC>kA`cna0DO};Xn3nYLTof!UtIw<HVQP!QQ&t}sPg!>r+cNrhzTRpyquvgs
ztzA$j%R~(w+d}QV3Z4iQPDbmf*|f7Xb0J~M402Vc=rXzAy#1<7Pn-x!X}W3n7ulBV
zD8YhXR04+Pyu3TDRbH@2o)8?9wg!|aBuQ5v$LGn7o|9KE)z+8t0pY21^Zc?Twzv0U
zFEunL>w1yLbhA84JKabg?V&FoKM&zi!k4$Gz9|V=ctw^Vq*qUrDinV~=o(Y3(PVpw
z5s1w-C8qk`g2Do4>c|ZO8a_0VYXL78(KA?Q43Gv{<JbX-IeKE;_*Jxqu%}MU_GWRU
ztU|rI?pyVZG>g81bJ4HZZ>QY7F!y*qNhh@R1vYi~$6j3dnwF$d%oAC*?{f^RWv0#(
zn%2B+LEgpCzjA6ynm%AiN1mrlT=1i=O8Id3K&1COWnt-!hdw=*kZ}FxMN@?kyo67`
z^|JTv9cPy>YMrYK;mOdbWxJ>X?&Z*Ij}w-uPLo?so+Ldfa_!+6gNQ`x@iFrpAy+&Z
zdZa38IXyo`0!NgM&=9&gzxubCW_nq4-8nHzn~w#Z0U~VnnmV`H-5<%8UNC&3F^~RA
zs-orH^(_=@q5S4Kh>Zx#q}Bzhw=pHY2`sNKj{qB2=<Eds<1%xJovC;wW1Z}8)L<)1
zGy1+}mB<n}sdZ10^MaJ#f>pBh^5F1E;f&~WM&hv$NN6Rvm(f&fnTUl#nZiO5x<Oq~
zrH>L}hXU+y{Yj%?szXl*qSc)#2Y;@J^U{TrR}GY&V>yg10#biG2m^WIdG8rOlkd)S
zdCpB2fw@Eh_D(`m^oo;;KK+2=q-iYSV{h~ci{yZ_6K!phwCNgRi%OK@1zS#^>9Vwo
z0=fdP)k5<7G8b}AE9U3a5|e^mBe^rMgiML==lUAUzDB1-3gaP2BB2Qj;Pd5vq$rVV
z`=tvdlxLZ1*!lczh4H?G6CNEc0W3aCI$PUxTTVNV#vWErv=^VkIwOIHpQ^9KPhrt+
zXqDV0ugq-D&j{?^^Iu1PizV`wn6}KnYMB#aoNe4Pq>@^##F3&BdyXz!@<jw;(clXo
zcEwa*gYKnh7R_>D+6VM#5uHQY<ZXJ+k6bZUUf&^Gct$-!4EC1eeSP<F;yu?WO-{>7
z&TXeeqpJ(x9A(SCB8xLT;(qT-<Ga)+U7Q=#3#^FW7ha?%A;x08RMvu>T3t4r6@SK~
z;^{U&V@z%$8>=5PgeRV3>y^UQ_Yy?SeIHz1m~F2Yy?u-x0w`d{v9|=%m@{XpC6zw_
z3$eN7xY(H>rV{5omuR)1@a(dwStid)xKf7RxdmZ_#t@!3tBa}CE*e5x`QkU^YmG~@
zX7qvAH!?tV=eJw|C*Sy03f(l7h`}uilXF`O^=pA6sHr1#lNMB_kC62@$FH~P@$s$!
zflJ<(4<Gl|W6)5bnnb>7|MWymN~Zhq&0@&H6>H+<YN@H#jIL;;n)qnq<tD}oKuh5w
ziG0UVWk;)+^^oFt#|P>6x1ClPucTIY3(?Lt7mNT-K|#t9%RU7Oz4}m710}|2$TCI$
zo#hG4S3VC$adBznPL0e^fW^z@)eQnd)VG_oyJGc?{9>}rE&zHFuF4ZphRc_$WecuP
z3L1u<l9MFT-+DXEdx27sIg+oU$tqAF0(ULt6wRv{8p23EDqmdU<RGn~x}r9H?<)2L
zP+LRjMb9sk0?9i(pKre`|3r^~p1X)!-!LU#mQ1+rtCiI#6OgE&Q4*l$`96Z8c_Zm&
z1nrAW^3dv!+jJkc4V)jVWAIsY@_y3BVx!wAlSd(gFoL}@Ue7G16?!y=pIVfQl913l
z-8Bt4XXNqjiJHW%()A4x79%MZiP8X@{pDueE%%8Hp$dE?#aBHbr*0w5baFge+i21%
zA;S=|TbojoV3o8*oVu6qH+H%es{BT!bjac~P%aB0G=BcjalQ1jPNX2c(xnS>L@Gmi
z$@ECCvkc4Jl^F!cZCnvrg?xn3UX$p(TF!J)NS!gO3TUKGv19%)t13?*Qvu@s<T#J%
zAO>vKKgL_eE{#YoI5mp>P9k)e<l|~=-|)zb_dJ!HrXMhq8<kPBTn#xT(QKIn@K9&o
zT~MWr_+VKnsjp0YT1+O|%rUCR6M36!@k@NiOHN=znvy9#lOAvT=~HbvuJldfQAtP=
z0jPTYQZ0B8+>GX$yij(&?8+%zsWzc-Dr>K(3v_Y3a^f_~X55b=86(kweOS11fCE3A
zl;-qI9FNje|4VBbUgHMxrnSuFY9-<)okdjW+L{gL@1WtMr6@CNO=b%rXL_bbd&|G;
z8JArgkyxOA;mx`Y>{F{=rX6vsIl)RBC8PPWog8c=7kfARe$WflFyAX3PU2JZyQZZ#
z6aqYF1ul+c*D^UYFS1wNqKm=8ejrD{UN6O1;iqzT+Gp@BO8TOD-=%nW(LT^Pqrt{R
zz&XB%afK8#*^1#(X?%h&%*lPG>(V-;dIAyXO|SSE3`bI#5*e|4Ih);U9?Lslf0t|X
zo?PB{Sx7;CHSe5|VG78a7}&ZwAjgguLAg+a{Y=DDLt!EPe4|Q;8>xp!g|`jpx=f9v
zzGiJpiZ?YRily#L85x+1!+yJz#;7T7wYsLQx|AH8;>y`O>ze;i)s_bEDBO9t0vfJ$
zWB&9|fXnSO>V3d=o>V@yB{(&28zG3J_W#)W>!7&0<qH^o2oQq1OK^9G;BLX)C3x@<
z9D+-5cLEGDxVvj`5AGHQcZc`z+<SlD_g1}CQ~bf4I_K=YyL<QUUTe)&q>cU~rtRk(
zUuv?D>r=ZlSv9_TJ*>|F?y~R!TSKC>pCW?p+qj&y9K`+cGa+;(A}V<M>N6Rm0$c7m
zbClDui41X3E~AgOl{cZVueb8OS>^k_iz!aQ?-(%ZwUYn~i}kx}44HSPHIu?fiB4@{
z0ZGlrHogvE>!w7N$zBMggdf}ppCto#O)q|5Pth!=<k2a?9!+JVfYIr_;?(3fy)#W5
zJv^v4rBPq8(;9Y9Taqk05_JyU8>O_$1UmufuR6I&j!Y1WQNDAKyK1$$5pFA8-M>Mg
z_X1M(&O88t-jjeyuL0@mygHOE=sUfuHq+0?$VUJOq(&m2uIAHU&((dnh+A5>IMa5t
z#s{<&GpUyNVu<*hf0Pho*oDlxq`b$0?o9tC7Z>@~gEKF*c(Qs3>&F`T@ra>58T;+m
zaKFJMVn_sLI$mvCBby-iJUi3Ry<zpSGat*d=biWp$^Ho0nG198lZt|HXB)CAfouG!
zo8FdnppOwQrGXxmFGy!6vzCd@J`$cmEUdH;%7tRIHb1P@@oTgxwd8+TfP(o4fE*`F
zoc=Iv*#}elY3iROsZOK>p&bUWnOv^h5tqm;_U(v;8m#C<%9gWx1@&&rv8cFV;dd^3
zpVv*YbslT(lM<tIN0}Kjl0I;zNj+T&kxawnatKV2EK6?skCG1rxNeU%&3M#5AKPF`
zZ@yvqktAamO@~I1yf6J2$GNrgN;6k!b@vr1Fm1s3au1my>9?^&K!AkCsiGZYCHjqg
zUg6^p2SzhP4pcYW!wMqLR8aJloBE&Vz0RXhI#vZUa=s)cX=atdOo`fw65z1ip_qh<
z93l%*7VNDnM-hYzqP10Y4u1c8{EEI1r&?~VD{i|SNR7-)l})B{wfsCtAHGx>A<9pv
zb(<*R<|71uK^=?1-ye?<+r1=Ux9~t9Rgs$ZFH|C85aP}%@b>hbt<>x$CnJz80KgHM
zxLg{a*v$6QQ(f~>G1~2c;%ESBY2#Ez2%e*$cGgcAy?`RS0mo^)nZF$e>mw~8FvdS`
zR+SaP1Tmb}fL@hE^I@nESN2Vyo@$W_B18LV#aWBTcH!W&&APnQ^B2Qy{fh|5J|_64
z-^L2bZLV_VSh~X^kzaky(dTn)*(%js#ekyG55;l<JxZd$b$rUzn$9FbNqHYUda#R9
z2ONa^p@*EXG?6YMMhN`Ub?XlR?KG)z+DL#RaaGlHf&t$KJUwvkzBE2PAtBKQf61la
z+3NqP$@jIyQvenop|JSOb6`KmFWRBgwYfhO2WWyWP-}1(Uf}pyL`=bb^xlY@-Q;2%
zwEs<@v`r2#rW*A6ft@^X)}&S6C0*I;f~zKG0`E0_ekG1Sd(R9h=f2}#7uvphg10Nz
ziO4!|NAiluZVruLk&^~jv5A^v+>)CCSRqZ=lF89jFlZ}PXDJIEc-digj`7JoaW10g
zTuv<iGxD=42BS2m1b1r&(2(Le)03uZ9w#|2)F-fuzIQ;L*BdgYL(E_JEg+z$@y#`I
zesjfDmrM=(BJpr`V@PtBlN|W)DsgNQW2;rUF*}6$t8qlFft-jkMaGl0oiPcIb@?av
zq}tz-l@u|VurnMNQfsGY2p32ui&}N=H)Tw*XKv4KqPm-09JxkV&FR$wAI$<K4p*ED
zv-<D8pE(=;5glvB<B1K@1z6<axL#+mC!8zSvn2ByJLhJm=~=@)VFx<;!Win5hxYK+
zdskkY*`Hju9Qcd`qCrDBmo`3s*TpMwqZw38=}!NW$=t*}<?=+Tacv<}=U!t@J)<r8
zUR!*P^FSVJmW@BZluTMF&uAoBz}-ABlFd5%g(H5FOUI@GN39^cS5+uIC%)R^%2d+>
zlf<U35SzNuozPvgt`KFaygOYWj~h$th|6U&TQzNQ6w+ilo>_h3CQUlklyaOl`>|n&
zr49f!={+w69srS<-CB8fBxUeu(X<IoPMNM-brZ=;$Hud9Vc&7955APBb;kB@g!j~v
zhM;$j252RYPJw1y06Di;C~dD-b!{!AHNq(Sq+mh;fUXH(xKMHyKjf<RkpPFK!=9^8
z+va_?2W;Z~IBd0{EVj)6bP~;{_6uJ)rWb2L3<T%XqL?K5pxp<2LkF$N$;(hWHEP$G
z)63WJ)>XpLzLaMdr=8#EKZ~m;W+Dp|(%3a&D=1}8Uh^%rR(U_S%3>^5v(f2&jj{Ho
zq>h?u^65~QsQxNAn49}S0>(eo;C@)>`@<L(25LrfyyuQAHbz`Cr2Q71)!I}Y0`u;z
zd+_a6iu)aEAOJ1-zIHWaUnHd=_0n^T%^xTDy08S)^0X^-#s3e4%n`BQ^NoZaNv4do
z|2M6L7~%UN6q0ms!^SnGo_poFtCJgx?1vlR$X3vxqcWV;UD=#MHSXa~J=A+KH6ehh
z%8u913hO5Q=MDhV{>jby0<*t)l^H%<9cT5)apT8zsKrE?x}>*G%c=oTlvD%%3__k`
zQg5xY(X>*s_BI%qJR=QRk;i4$uNv-J4#!ry)lp)kT#azqPe8_S{?5)+_mYax?;~@K
zSDj~xIrH}ke}&>IjeK*MEBBt`bI*ZY--<QRZqEA`9XEbHvo8go6;@%haEobKs4pqG
zq`r_3|A+vf`?;k&CHv6a@j!}wQe|$B&`^crQ)Nw7L+meBipv0m8~F{wol!994~Qrb
zLMZ`YMw0sIH*Vq4zqf87i1@+t#v*l4p0@ojs#)n2V0Qce4FT||Q+ATy1R_K@B>@<n
zRSPRWqhuEx49WLhhL0g7DFdc&R8~gAC)#uX8mt0S2T+x0->JMoqgCG3e$V>=BLqAg
z=7j))1l&&fUQUb58Qr)<Llx|OWUEpAn>fj#9Kc+zpG}wg{C{9B35nCl(aO<&iGBwl
z$Rg{w@5eF^LP7uXzWqhMfxUN?>Ja?M%tiYh?>oIjLcevvPQYS*{F_|RnuCK_y(+Ko
zP_K;$@<X&bf_tP-W>va;U-gQcVyTP|zSFB?V$Dkg%Jw@-FeG0OkNH)D9gtWbD2yL7
ze;+fj>jK+zoPSBf0U#4Yyo&k8RY*Vs(1q0Hh2xU{qaB_75cyW(y;~`1(+F+s@oiws
zzAJBht#QR!FqW(_mQSfQ4EIGrwP?z0t#R{U_zi4Y|877NreXZu=tTbTyUvLi_eYcZ
zvpM{YKU}}^(G2`?0Fd^iCL`-0AW>jkkrDldz+a(w6T?CJKN24fDcUN%ktEi|H$MMA
zgr|+pp2Cw{3Ag3^KwX?kkOZ#Dd+Cw8jmp*CAmGc3^>W5k(lY|>G(qYGU6p+qkDEk}
zEwEx&IzLkdFaXf^1q8B!<-ZULELgR}nCrlQsSnj?{;8VxkOT++Hdp`abtpdi)(>e=
zr~d$D`{6eN6^6kNOVu!feT*tgx+>*a0fo?EX8QD3K`5eE;?y;fr8_*CdxWaGl*8xF
zMtC~%;|yazg~jxTG={Omz-*C1v94O{ou%V=n|g#)8c1dj(+|_*AMW8l{9>RO;F=%C
z)euuxiFj+^W#V9q1u$eD3mIG^=b4V7zNSMU<-=B_X=v)9JWT^boYe_lUox)^ZaGS#
zp$$_?+MIG+qbx_h9T<fXNySHsITB+r{(-0VfVQo*-7W3XtrQ_HkLUMqapt!N=F{<Q
z)GaLk$Dz~yA)-*~J$KsnbMkKzHZ5J>=Rc?hC0HeDc#Ff(u9nbl_RV1=@s#jtP|`ez
zP_+c^(!bMawxRtP;r9ICz1ihtl8Hq0$do;a-Rh_0!te_%RLpzl?bp$S*;KAL4d{Z#
zTbbE+|7yx^4^z$m_8AF6;%E3GcOET5F6+b3d|<)=*Q(lw0G}(KlM&eY&H&{f_4jcf
zT0CHX>-R~rrhO~t7F%&-M2hDf>bv7r6zeLEqeq3ifbYJXFx*%%5|JXaUsx-n?qU%)
zxV;uUY$-}I?&E`z1NL7L9dH*(5ism_Uw%(u*dN{)7cCV3uVI&XUjvxV0SWLKrA@j}
zUiOe7R51r;(@o{iFcZ8^;u1YyYz?IRd3cp}0h$-Si7>b5SibK{#Nr@MZy2j8Jof@#
zR_Vx8a2?%})a)mpAIRJOvUyF`otRBt%X2)vshBgup?#4vYDG_703K!N*E?kYs04p_
zc2ud8bP+BUqH0r(k1D6B)p$9i7~BSw?To&vDf>#L>-sOkh!W@$<l(~~>D^>7d|1rP
zC-3;Smqpa?@9qvMKtMM+7}S7C@P?p&`9B2xrv4bfL)d=0zjOzH>ng>ue=Aqth$B^&
z6D0br-;n{%2Lgah{~`U+hci=9YRr!4G8hrJY&_dz4<95)6~o2lEw=H-C7v$$>8b?4
zx+kl0pKea5oBR~0z^~7zemHC6O<uS5XP#&6z<nP%lZ%^DAA`5Au+F#od)1T#v9ZO^
zn`69gPd5KBc|@{wPwZhT{m#D#MZ;w=L^|OT4S*5yQ_}DBCH8x{r+MR~B+{vE&1o6c
zcVxUH(=68w2C(z<ORsN$1%oIVQPM(<4x1Z}GgHxxX*w;rIWyI2dP{8FZ`->Lg>1;^
z8e{pd2{xJ+gD^vJMLVU*2!usN(OI*(kp0@`imRd*kD$Idtn$Rl<`1|SEM@lB*&v+)
zELfB|VFkA>yf!jd1n%T&+<&Be|2cZvd|Bv-tDMTvG8h=MU{puOkIC5+TWE`2FQCjb
zLYZm9!woLxr-ear(q`$4hL0CexHq*~tfU|}*8^%G<=(q)GWDXU00Agp?@h$Q6a!e7
zATutv(=|KdbUY*9dn(WCco=Uz1pxdGAjA6)W>8z0a2Pa^dP9(V0qV@h=<H{gkt7B!
zW0W|jtv=B_3DFp?f_vS18<VHIb4}_0Bt3Zs^Jnn>0GLyEfsNl45>#Q(D&#0UL`2i=
z2AQR!RATNdUp56W?Z4C1Rzw1~c7_t%Pxw4WiJ}7f=GOf}4W;j{=Z&NR3JVjXjIIQw
z*m7q_!zX~J<NW|;(4sIKh~#xwzM#(I1&YaEukB0^|C9s3YjE(9q~ipq^)yVaMYup0
z`qx^~!1tO5_tGTj2cn!&Z^>B>W(bQ*OyNv;LvMs^Ql(7s6)(B+NJEFuv(QdVM8K|}
zQj1(7Dhm|S<AerOiAUSxeC(`As*P55B7a-Rfu_<=+zG3DewuDGbx!Y?xq>Ykr#NwS
z#3|g+7jEEya@y#Y^y!pAj&M7s*XbJ9{x;;vn(jdLd%#fS8ps<zFVygp!*I@izjPSb
z2<hSCi4<%UIV`rbQ@XE;qhf;fAvdY=ok^1E03IMpfqz(B`zCCG`|tYDU@_@p3BTOs
zyY2&%8X$`<bY7|7%nop_aBWHjW|_6C{((irL6Sa2?<|mxA^E#7b*=YNLyAGO?1R0!
zY*yIzV>jH#jnI$!F-Aa{@Mnsl=YhY}Y8*enC+Q7CC;M15fTcS4Pb`oqDyN~No6^@k
zA09%Y@HyVQ9=2pELr2Zt9~9*%SLpNUcM!GcNX?aKoF6x?2f)~WL&6xPe9srt=5e|8
z)<6G4ss62RGJ~xjG_1u*TZn!hed_NZsbUsDCqw)l`u0Qyl2mM2TtT+TCNs+f*Ebl_
zeZ}x_#+eW!ZY4)-XiG1Xb`!NlrDP}TigB8KDaPokaO#-?$v`mjl7{SHiCa88S3U91
zl;TlZK}H?#pS`IBe6>M}Oe*A43}HLifnZYw{QQzo5Ndw)Q+oWpV9Gw)*e__Z|44a^
z;^hO>9W!EA9AF3C09Z`KAKiLx*ZpEpg}*)HESBkTAQ4aw-H1+g`uWw_P!+IzjpKK<
zX*=sdTmaxi(4m#cgxsWB%uZU7TYnd8Y52eWHNJ*Wy6QICM_JDl>YOGq8xZt{{v^x~
zng77*GX^R<|Iq@p{TpvvC|un)qlrI>X#5Npp>&O>!jzu+<yfS-on2yNq`=&RpL=X%
zl!2rUG`1Zm5wq@`C*p|ipKoJnczu&)DW&JFmK&Hl9^`CT<<A~NcbY={fb2P`vIQ|M
zp7uEKKwe_>?(e9>Av-)Xy`NfAtq)49K<$WlC|HKlD)J(DuGZZrP?Fij)Dyy*sw(+0
zLz>byeXdj&#{759JI!ULt~nP|IYx<b4mzqa*)Wnqqb#&a;&d_&?6RN_lYgCRMH2ZO
zK7^Y^#-GaQYCi`vUxb(BVYcdPv=u+4=?^A}0nIdVY{drWi;WE$ttAb9{M(F+YBo8t
z=$yGq?dwt<{nwL%-(NoiwM^=kr3PXsfJP?#KN=bFN6mx|Y;}TEb<78?*eVkuK~JoM
zX<0C$-hUveBikI|0PREDS~u&{NPhD#M-2MC>kJ+T-50#0P$7Y%z-)%KfyG)&r>8q7
zfVZ?(t|Tvk+JB|Q=O`lQCc_#vno{lY3C@N1@d4oKF;=3*!=`aqO06`#ydo!2BC2I9
z;LhZ!f@Dm<aNgG<<U@nsa0dWZRoORgH$XqY=>*;Z$k#T3dizIr;LrKR2+QM6yLjZm
zvji-@tc=n8Otx=-`&>nqfEa4Ta-)OUdWR3KS|Jh&G5?qLVXVFa`CY6n>X-ZdkFj{$
z9Tz+B+aoW#`#<cQ1=E)H^pyZ6{CtR6`?rJm`t=^wrZfKW^F_;?`T=6i5=`Osw(K<?
z*?3A(_vy;iwfpON=X4~5txp~oU;1KL3cM*($>p$7JEawrp)SWi#TS_%Ry<dm4F>EJ
zBp0DQci-WYrS(<29ItwaE<T>U{}@eXr^{C$f99h2bkGml6oxUC+hxcf$c;go;~l4^
z)+iH-jkJm<7B9$CkCJ5$2*wXnm|LRJD5XY(5|@<sm+F$f&=2YO#L45^m^R|wR2|Df
zogNl@O;tUn$3QipGrR)xIckwPoT$)niZFC{D2?DoP1v|L5tqgqOFE1+US^)EJOXAW
zOtp)dEQWVnHE-Hv9jT#O-~BVeM-D6JBO-ZmvLA-b^w#TQvsEACl{tO5okXYmJK}D2
z9#+&3^Ta}hMEo&;yjxk5{_IU5XQMKA{7oYA<Sp+;Z8ZVt3wM7hJYZ<z00P(H#+Lv%
zk5(JYX^^b(QXt2g=ONSACCY2rzyLUsZ8kCXz1j&Omy8aLB1K{(e0It{(a=y{PYc%6
z;eJjGp<}~VO<zJJ1kY9^5`hK&vdbUx(?52C1<eB`uu*@sd)+oHanOkX0kF3(T-A`0
z)?K71KJ++k11j=BuikbZC3_xVjH={{!7@aV`}t0_!@;5k$R@}gME~0GY75z<gq8`I
z@(TuYkN-|#`S`~DLN}x)pT-?jGQz+hyfNlS5lo2OABKi@x_mc>xkvh$E4T={l14T3
z^LUQ%M}<w(AW6uie1S^76e=xu(^}te>#ebKr)PYC$W%2;s8*-|zlN%>bdsCSW$Vdd
zaF{+RLp<|=4djPR;dOJA>^ue&Rd4`ici#Dp_81>Qb%G91<+$4yJHIAa$8wgu>}5EJ
z)eSb+TTS)YoDR|SyNmb8LcDKR-TtiAGZ@><7Mt7~^T~z)<O$szzE=8>9CHjtbqc=r
zz`J{D6n!JizJQ!h7zWfm7fEEY=1NA}LR7Lll-tRSIsq6|0nQvCr+B-kOg5R!^IH3l
zh(`OCKP~#-h*YB4?2-{{Jkalv+>MQtHD-#Ga^Pg_jLxh5crIGY<&sF}^(x)+4Dkrw
zv4Jm=Dj7a^5)cs7mNj!64Ge8s^R(0Yy<l*dtFdPmj7$)BymHJ5SsHjZEO*SvmL%QP
zZ>Iv#$)Y3PB<ZW?W=f+&PFHRf)v}dhtn|6r0&0XrfN<nrqv|iL2uSt$A-lc@HV4O#
zK$!AgKo<C-kVPMUgduQsMrsy7UB#d^Y{myqLUO;exT@w*IL~@K41+2maHkgDQ}p*_
zT4S`H(n!WI^$O?FpR>-Im1;Z1$hUe{A{!|k*?)UTEmR{wE{PtN1S_k%w=|2A9j+$F
z?}ZNryuo?zb2-MtWTyTeTcOYt3-$hri9IN$wYgfUL~zYl4^l4|)X1XA#-J=R92<l|
z+5FcHQ*!|xFP@z8gFr9<s@hyL?iS~Vr^Y}9dImR%*h<f_3sqX{c@I>Ks5lM%uD-9&
zx4r2#*S=ODlzmxP^wQ2WQkD6R_8kfsSXr18C8h4?4xh*G%5=DYUUt{W(7(_qW*x_q
z?plU!9$Wqf|H{LE82tE?EhKcqlUcWpHr0D`jm8>1IhQj7Xe2vfq-NjHRPU2e;*YqW
z_v4mNQ22DogoQMq9$iv(%S{h?cQ$9RcV^tROVwlaYI(b(%IaSdcVTG}MBCoMy`5o)
zg^T-+G&?Y;Fw_S4Tl@N;&;eg3O{N<_x-g03SN|<^liJh*Cv$e|7g9NV9j0)4bFA0o
zc1w~SS(v0o!OqjJg&xNEMu^AIA)!6C7XHo!n_+AdO7?|bv&;%i>J8u8%zVnA3JP;Y
zD})UAir4{tE61haR%kj@Gw%qce8b1wC^*SNGZpME=sN2JI174o_YYLvJK?jN-XqCO
zgr;&p($#<h9uKsUkR_(~Pv>zUS;5s%p&VO?Sq)0BOrXpcBWnXi8xVKVtEsCh_jd)>
z=qQ;=xJqb?)=vQR>+%uze^AN=Y7=I2Tf!-$WK{@w9e)M;)2I&K^UE^FBR!dS)0&RD
z1N{z+%%v?#SKh|kHLnTCydw95b8$lgKM|ZCx2W?>2kO1hZ8lpBb(*YL7tDO(3ky&k
ztdT*-q*Rk-53ct+K5%GDiJ)pYrKF6^Not)yOX7YKm|5-@`o8Vfcd@KcU)L$^ippS#
zZyN2R2A1w8=>_jlpeP(WOA7@{LRiaV`VzylKDoWu+D5dEx$wadRR5E<;f5^y%IVh_
zvwlQav=s{BqAx1GsA2FaMk+OIZ_&tJ(ema^29o|zWkz(P1jW6vwkv{(4}C!T*D`KG
z5!*`Lb;X(h!M-7$+J38p3H~8#i0mO$EmGR&)^7gF_TbC$e$jd5wri!<<x~1xdB{`n
z&ph!7IP9`q6Yc*p_3(|ntHMGm7f}B$!jJf-hO*7<Q`kK`YH%(}hh{&=aVY&xv^84Z
zzc#u(j<&?460KM2Jq`XV-zkqf+8@^KbMBFlOsn!IiP-68)Y@CMrE1UuARBOS@Juz`
z3>NFQk~p_xKJ@At4d4=15YydFWSu-bC&isDO8k!d#3GcywfV}C?x#uI>X$MdBN|7k
z<BY|mgOgM`->nwiLo`DZ+1-RLE+&#fu3?jS|JTf0yID820XCa;{u((vGM?Yimmkc%
z;Xf0!x|3?ny$8>=ZW#ghIiVey<Vc1&)!4kmepR1}&0S_+bci<SjsTlSfcFzBNFl?-
z6Du&u$z7YnL^bcu#VVT;=t`D^Dd^&&f&`w<jrS|Iv?OGR;{gg*1~i8(J;_N9g6W}j
z6alsvm)RaR&;(ecE;QIO9n1oS3qDhnMSNBdizVG`lBGNoAzD&HrUCatv*(21ZAIcG
zP#S+akxN(OGM7O$9C@}zN|zP-0+_w~ahgT%6<J6Y0NPA@-|ys-6Bon|p~WCmI5?+n
zqqDYO%590F=fQk_xJF#7y{Tg?5yzmC!)il40*hzgR9=#74G7BTknQi!duwLqkole{
znDb)fQzJ)j7dQufNtr>*vP<}75(_jZ?qnPt%lU9I)yIJz;i%mHmSx&@9m7=*xoai@
zlNYRz!=ImYI#N!9r-)|CZFbAtKUdhg*;Lu2uaPu30S-{&Q-^k=eb)6KnjCLax)bky
z8WS<6+9b!5<toQBiwe&uME_Lvd%5}9PcW&PIN9HrPjzE5OMx@N&PGXji*y%Yjaq|$
z?sf;MXbn2p2^w1}SxKJ9L`Kt}h%egpV2Hg?ui#Aq3><(y@4Aq@Wa>F6&CL%)9rRa#
z)Y&K#@&!efs2{)d(!BUSo!-~tjECO5hTn`Gn(gfHxl{kW4egUX!TnpsA_Y)!RNC#K
zcYKR}DcNEzvycc?p+<lF6rh>qsheFzIA1;xpfe-E<~K%;_7TwY`8n%x9L1xeD1-l!
z{dgPKIhp7C8IZnXSbG)Gf3jn<d$KCiYsXlHYZ@m9PVd#fr>Df>jDMkJR$8>bwidB0
z96p3d$jcJ5xS{4;sWF)bNOe<V?*Pe3H=T-i4&0}7axX&lU_1w+wQX;)vEF(<PQc^;
zQRm=A1?^Adpi-w;S=rqFlk3?lkae2`1bMU@b-L)m`yfGb5Q+xe4Ma&DdvPZJZddH0
zhbREqABRMBFsl+d90h|~z}=<Yy6W)r!gI3wKZVi?R-4hYo9Rn4cTF_zR^O!|3q$yy
z&`lmF(XG_U`+#U0@+|b|pMAQBdMcIGSMRF%jl<{ZX4wIUZVwUM>vg<?FDvk3GT9j1
z!T0nv?i;CzWsi`i(qqc|j_)~CB`rq~_7iymIgT)Rc*K3n-xZ!@WM(vGsjBLL8U~sT
zcj542l8QRX6z&L&U`(-YbwYz;CzB_^;&WpUIY(y6$5xgU9<QYNOP9Wl{V)Vx-$7+-
zGc6P*{U$Uj`Q*A;OZD8vzys|k{pmm&rECE*(9U<$1w!vgrL=ZuqDS^;FAkb^>TfWn
zs=EvZhpBJ+lM&6fgv6G6OyElZZ+~|<DLVS>RDSKS>L<pkPJho_%qbJk>5&Q=H-bYo
zzWXJ_>-B(zfI}BOWF~F816o{Yv4*ZNYK;i=JL%8P`V@P4>}_=?sn-{^Pw#FVQ(>2p
z>%9wc=hFM#3n*R@@>H13Jk(pgm|K?mgpj!!j@(e|a+O6f`vBS(aHzZGgnvY1{ClhS
z_H)i|r#c<adpZ|7%qbACm_ryV&HLRb45;;Uc<iF|Juda+vgC1DHlCVm+5h<bPN@6`
z;AO=CB2xpzvZvW+$2{8Y&U{<_N<ShrY<0{?;b*R9=xH@7Ff5et9j<fe$DU2uwRWKX
zp82;|f1i_6@fG+fmv=&|vPXd@30Gad+yuKucee(u{XfmUvVzj!S&u&u+lQbjZC;0D
zu55tyC06eFnGq5CG1squI#~PanbCXknn8tx>iee?y+->y$@X+worZUN3K_Jh6%KjM
zQA#RLffr5A_ZZaCB=&LMM~#ZT7WPAV`F_MblO2L*MUfnK4@kxHWn6|H&&Z7N=UIZ6
zsES#<(M+A?y+6}xf4S}(YU<sb%_>=45-pT+sDc!t>@xgnZ*+DETmh~Grgn`nO1y9i
zMmhhj#IU-xKt&4X7yjiyo&RA0;`uE>S>rw@Ud4tT-_Un|b_8-8c;;6bE2xG(2HvIq
zeJLnaj!gJV39fZot;D-J_Q}x{(q=Irg<JZ594OW=x7F1l+;#Zfemx+iri~tCNNn2~
z8j?|T{W*O_Q<Rk7_g-Q;^)08-Ci!si$t(a)*~^v0tDn$oDVS$$nPHI^In_g0`1j3|
zUJ0RY!&m$*5eT#7Z=u4lSpF+$lL8XV?{la|t)AC5VVvQ>|J21NR~Cb2^wTPrlirsx
z573(dAd3YYwalY|iSg?^_Q3aZH;LqKwl-Sh3-H29ho|AQxd--t%5(x92n9Xs%m-`(
zY`b0S33vI25DgF2<<HP21i#R@X3jP9PFReAo_+=`Dk)H0sT-yrjwecd0)(i3%3)x>
zDUhhMQx+eE^z0cLO6S`=90l!_0;Rg%$74E&@r$#QKRt>N<@3IY`ezrjSHIdyS<?lz
zIkCscnI}G=yqJ7n1FzX?N)LiBilAjw4J~0vH_KRPb#rj!*RwLK?i}RS7924o9`UFo
z)5!dicFBK&9lt;s6`oRg=M5aZH4_wfiRK5gE%uIAu3F&G!{~tSq@5Y}o6wF$rNg(3
ztVWPx&AzJfeb4DshqUk)pG&5r<yy>g!a2#sh#YN5P*oy7*D!A5hp1cml-@S{gz`k}
zcz(qFvA&*Wj$l&j(AfiaW0{<aX$x#`?SpP|2BX<h{}NXSg_oj}R3ofx)}4}Y;Oz7Z
zfC^%@-xDL#0~w1eKWHS+*KGFQUKnq!6u2{+)00s%=j+Ecve|7S99Ag=fNioojUl5O
zYX;@B>F>oD?G~FzIvrMn;9ZinK`4Vd+qF`fK#}RH_tR-dD0TTK)1xX;oZL(6-aRqq
zRgq5n<0*$bUU?4{0SE5Ba6(jod{wuH#Zt^q6FO1XApE*uht9*Z(`^VKMkc*})0RFn
zmuGApzY9L=mQG?KAI4WywLE`XWG{Rvp7}+`E|3G?USL-UHTTq}!V2zP_iwGYix$eW
zAP>nN$l@EZr8QusQ7!r$8p5<>G8bWP%)ihxm%(O~NWUBuMqyI-|6_8Qf;AN;5sRhn
zG1#MV&z;6v7TQd*>i01B7zY9tXBgW`=k3D2;j^1sb!M|m%c5dF9kbaEyEi^RHM#o_
zdZeq<mV7}uS+|W!3i?o;^RU{8<BzrEX+Fjr{oLRv4LP%u&!RZ~V?R0i@OtF1TOHO0
zl=e+KW7z>F7zVrY^j^@f+E5h;poOB3y9p)BjS*ns@TWo>p>l-Roh7{X^Wpr)GWe3!
zdekwd314-vxnt_MmAYgg9~7M;a~b}r-C1D)wZMDqWUOZvn`CySV4by~_e8wghMk!I
zTNV8^>rn~9OiIE(jw$>-G+PSu)uEk0x<*rM%(hACde@T0c$PeyYZ(jXHJ=q$%v>!<
zUVz6^Bl!f7(+!M>csf7G*eeTe4S~{KiUxlc$!UHMhANG9Vl)W`4*s`O3#oGnE3mS}
z@!r&P5%avyS$7129>u2YqP6;-sEhOA*=*l?20ntKYa`MQ*1>yueu(YfsV(9fiGnf}
ztjG31e%5Wfg$x#lC46>NQ0D5z>YnT|kabbYAp_j4Wf8t)a%rF6QRusA^ozp4QRZz;
z#M+x?(24>;0+`tzXpSx<E0l_7<(3*+$)L>4%GPMhUjInhcT_^rsv06klCGx~w?`*~
zF?Bhj<sIpVBm7e#iKBbi>pQt%*+gQ;DZc-L{=M9-<i?vO^E=CO(;k_2D%W^Ku#4V{
zPm9-ngX`o+9`CAa)&vC15=Su#*VG=>QZcZx#*n}8E}7PKL^qId5VpUMsoU5QO_o5E
zMRx$$tFDATP!B)fNQv_JTTI|uyBUXWNO(j)qPDfg)i_(<&=%J+qa^a2g!q6$b;xyZ
z<^dkZSJsYKYhpmjl}o$1`}X0x9Sy1n=7tMCv}{%~vDb%g=hs_yLziO+b|O>eAi2}p
z=q~+tvG{#3uhUeSon>s-R`*N$R>5k|25>?<<#E}qmmzvTN0%@V+4aU_1dSuIw0}kC
z$a+AkNfcJs^&oH~CM%MGToH2rAwMeyktds3GIjwRfUOvVPm?WbEFgfdD>>eNr<#mr
zF;3~V#8Vn`&%mP{gcEqnW4jDL7){iGr2;LjSY=o=IC`K)BLTk92=j<Pa++5fd0#Iu
zJ|kJRlU#56BVq6r#}w>dg^Jbg^r!M=@c{tQ5!T(Y_u&D0uj-L1_q~F9SF<M21(InR
zrM{T*{pP6g{8CUBep%VF`YyGv>lpO}6BUPF+KgQ9r^#oE*DiuGTCcBfYB4b@)l>&Z
zG+ar9WWO!UG2FzDeT@(Om<-<t=OZt3hsU8W-WNN$Y4RIJt!QgOq44fY&U`xSFp?1p
z2werjn<O?qY&3qJ)>~OylW3r_A<zYQ*1@5u5|-L##O1zr>bU7>+UvxcPM54kyU!E4
zvdBRq8VnSygoJAB$;n9w6^^!Z%E^g#ORGG80hTN}Hadt<LWE%L^3&64hnuD4cgqF~
z%RRTXiPy{F@phVy{c*_ig}_8NBAH>PI7hOF!z7DgA=wvFqBCKaT2~3d3jb8`9V}hP
z`LstwMF?pYD%j_bB2B=o=oncoYh9UIpcByY=X)4Y0SBHtUL@AP&kgZdZ~--&=h{&O
zPwJP#=Rx<pyZ$j*$+y+}G`c9)S)+kDkopL;KHgEH+YE}(^IZ_LXWL?dr08*U0bG_H
z0<W3=$b)8boCccv9_FxEJb&^!&k@V?03%h6xi0Pwstruq410B`%&Wvwx_Mt#{S^(-
zNc8E4m(X=53v`70@ScU}3pQTS4QikgyrF^!UTDhdY!Q56&-UIukNF}0&&H4zbM<F?
zE?H@i;@Z35{>q;zWoy171R4za=KYmL(V?`iq?nqF>G>2^k*^_4`4VC1U`!4RM(oAO
z&we+{X*2xh)`z)j-*Y(4WwN+SV%GfxnTRvyQ^1It#aMZTEy^|KAQaVdn`AmHG_cFz
z9`ZrqpMVgV@IAw<0+F~a=krS80@r7ohXMjgm+gKfJkrmk9A{#k?$0P-qx)A|Hv&|+
z=_XGUqTGULb0|xFmxCS;Yn{rHvZb3q$gRO=K?(e<*KId^UeEYmRk&#wI?;PX>^UD%
z_3OWrV#)&eEYC8BqJd!|&naGR4)UuAqCc6<))us6vOG$;<%egjuY3wF%x4~hn^6u{
zKaUWBLN(M59FuI{3&CQm+OJ0Rv%4I${5VC>#%9!1Epk}ZY-4-BbP?=wm7S)Rd3M>_
z=%7PP)8uLzS(8+1>u*u!(4XwXe_^wozFXsUXmKF;t!~Wq5p3DN@)d{=uB9@_*w1P$
zV%X0}-f<d|+~6?!x*!`y@h`9th39T|)&1rOh!_l}>I&&P8r^%H5BRqefsg4dJBehj
zU!7_jI+M-3rgP|Ak5=rkID-{f30?|?^cucG`Gr*d&g66H$~kYmbobzZoNYLapxYTM
zUW*2e{}K)S=A=v1`;}AX-Ru}EfvVQomN_TIyY4t#RXtTW6%2rE|KablX6_dkN(^Rh
z1WAha_fsT$2@#PgMkDIvwP#fV7Ynic0CYR7t!pGd&ey^t$7xH^Y5-_iWifl(a)yyp
zyc@XaU(|#_IO&&Flp^@qPICi8<2qSX&rf~V?8MmCe1r1kL$+|fHV=8jC_4S#_8f5%
z|Bbics4ASbI<3uD&RGZ1L`<6}Ja){STtj4po%H;nGlMR-cVic@>zbhG`}=xE?Pyw5
z39nyd`~#Ua77`i%@se5Ap38{$H`+!(mHs;CoM=mjjR1m0COMXAA8ds`ifB7{3VT$b
z@{8a$n9|iTs@VeLC6c^fW{yMguQ=3K5M6o!V(Rv7DqlM)Y**B}VDglD$Sj+5GuiP7
z?<O)i7CQo+g;Zc!{{!)++0&ZD5_z-%kFM49@R5h1;4uN<Kh~d4(hqlsv?`h%Dg<l+
z|G`XhRxAGk1zxX9V$?ifEKQ2lE`c-!VS!y#=Xej+I)cCQxvRdq9Wi02O;yPq`~wd3
zvm`fvkBm<3QuG<CEpmAon3LN`EjoSNiJTd(AET;+u~<@|*f>C_wIK4G%vHDUFi$Oq
zu>j>k>H&oSfx}_7Nh&1zGDS}E(h5C+V!pMm>Qke=T;1O(N{<M2T?lE<E@qOhg}r-j
zD6eSl_7(nGmM{Opgml3N^SS;w;;30s6LT3%5PPnuDJeTFy0ZI$bf41KYk(#fa+5+u
z=yo7Ou6HTbg1ct{g2@#Yhe-UyF(?Yn?U>B{c=LOftv5iX)JTyiVNK}xOL>H8k5S!b
z=k>ZI)qSaYS>OW}OJE}tIM8x<iMM#h%r*;+5xZn=y51fMY&J{84niT%|60C`rad5}
zlfaKV>oQj;>YB#y7aZ+dqp3Fd$Hlh1Fggw)>_grcI#oH$q=t~5(O~&R&H8NSgKFRw
zKQro|KgZ8{PJQ+2RIBhNE{M)*we+Y#3Hre|2fdnVyN$O>&JM@^0i%z|!@qM*3I+yZ
z_lxYhTVD-^1r#gge!z~1dHJ)1f(2v&4t&6sDPSdt4wveKd9jp-Xd|Gl{QOHMk)$n^
zh{w`-*W7%9b^`QgqkK)I)03LhezIg3r8^-<ROcBp*m$KHk+KO*H!34BahmDi{F3rS
zuo>RKGnaV5qlrE6lP#{}1NrDQ<oG_wUhMg}jZ#7UjFpb8$z+K^<HoN{w%K^9gEhWL
z+8b7$vsB&kjUGy)EQ9Dwu17kb6wL!gL^u%1Bqm592x7>#tRE}wgjATD_Hoxy1^lGM
zeW=6UJ&lH)1ULroh;n99tdngwH+b8h5r-B-xd`OE_lgXh6%bIOUHGclDiMT49*-7f
z>QUA#2&f9P6CU<+L<SrrAD9$Z!-OQ?ir$fxAcF0S=ggBuq&SlbL<sntK|8}q@@VzY
z;g*r+py-eg)V)sELru{r#{AqL4fo&py|S4MF%$n$>ey1-M1XO>NMbmO_Zpi*2M5*>
zf%-|)e|_sO+1ln&o`3it3;E^fa6JuMprhxYp;lB<g3SI|+6pg@zf~QkKa$G!o9C19
zdIi52RKE+;a!ZmtrB?P+$<26?zfesge^{TjdU=^5ApyI6x=;v#D%7<>$!u8>A~%KS
zZ(yhOP*g(~nVJ&_y*&oUd>EXX3uBZ^76^7P#Ul6}WVlv-jlO04MIK@d1ytV>-AhCq
z52IJF^fTtcjQJW%_%@xq^sZ><)n3aM=fs?H@g-p~C-ib~)|z3%Y&I1;FAcvGmE;TZ
z9MF<XZP#MS8$c*|;nQ^R8b;-)E>cs*5|ZY>uU`FfW85U4+LB7JgD`(Zmo7;JKDR)~
zmw)cxEmOI64J`4ZNj3bjj+qsWGu1ofj*jLbVV242)R52^v~S-XTjqJ7WRzi>T|3vP
zFE~qyH%a2hL+tt5RU95q>x5A+e0}8-9#%}39|~_nWw+v`?qg)JyklYOw|dBv^jw-d
zTb+}nSkDT2M{bR#VrGAr;n=1v{!h@XqjT)N1Q95DgA?DDjz`KU4516rIGyuU>>aTO
z8J9dUR-q&$ycxgdck--%kcdR|y#)ETZ(CdXzFphXmJ3JiMrtY$X@*l+j6~Xr^=@-T
zYVN660UIwI(;-A;DIJhiO3tCM8!rwGvO)}F^U{@3h4az=qu4!x1Oztj)1-1XCM9~8
zhS_e8NLq}1sRS-`8#^ZVn^>VBoR!O;wA?bu8w5ddDpm_|r?L0V4cM~gB=fxQh=psh
zi1!W|JB4p#h*x4>$jk#Cv-}U0LHMwh=PV`yZpG_nvo#3OGgk*Q*RqDm!I^feW|=4<
z6U4iLUZm+!YS3dBOWCKGmDQhr&={Q`IKsXYxF5#;KGeQv1uQ^0t_&hp2-mcyBO{06
zv;S=KH*|D1|8-bKI?>;xCD6!4q-0xAEDIUx67b|6Xh}ap{)UR0QR^|G02?Bf4YPip
z!mksi0u-UX5!MJ2ambFQbYMO&OArh}p5}M^MJqi>-R+dMf)oe^0ZNIGM{IuKe)|Z2
z6)w*f4K{&RR<%8Ojh+daPTWdn^@319_x1KYXgqV5Fffc(KNO4FNW<&BGYs-On0gJg
z)py{)nIcl{GOm$ye%_=Jts%-$SY#R+lbCN)me}AKZEWh0Aol42DTQ>Za}7aC+e_ba
zP^1PpKQDp={2|L8vcpQz{{fv!YBtIvn>I|q<4W0w_zxV^hoNA>sz_eGqEy{KU-%0k
zC)$KS1`|T*dh_+waQ>pY-Y7O=FC_8P>=qlrm+eN_pCwprC?z*4U!S)Dv9D|Tq*#YJ
zr_1Bkd)f7h<K4l)N7r>UXEg0%%ph8Fy_K)W4$&*8FEy((1O?I1pX@85Ov-<;we-ki
z8ivF&n;x4Z9tZ(H;DMMpY_6~g;Nev}eSJiU@kc_6N!xy;+baI*s&wL4kO_|8AI~Z^
z`ezCaHXxDNMMZs^YM7DljNK}kSDO%oqbiN&5rq3qLd+N?jFbJ3Ei(woHj2Eu*wU1<
zt&bcT+0{03i86WcM}7qwIpSy>-@voGwKOQ^{;!KSPI@&fis~1%0^@Sg=}pNQec_=d
z6t4K#iy+rP;c4m71Bq@y-DK6lKj&k62_2I;&A*u+M0B&8Nq(lW-OPJKFoeVfNhk<<
zYc}ee8xrbgX1|uxfGEbMaP#KQj<$m+N4u?L*2NQl@<qT-z}fuWFk;_JsEZ8C-^ez)
zLac^{{}O%0Hc+cy3k2(tfWELVP+k=8em{9{=}M5K5lSH7rRG<5y$7-++he}^k*814
z>ntM#%6CLW)kPb&6^+ocfo6SUrf~7d>s*-;uxViyzVBo6!qrO3hDcX_wkY1Mn%e{U
z{=D**(_Z)UjpGpqkf-3W7#<V7IRW2_poh8_C|b}v);S2*-j>O(W#USnmE0=3_+2;e
z_ocg2iA?%n^<NJV_0rIi=nsmE5ydwW&y6_g_b~L*gFWYKsOliWJ?j?-gQv!`>(UgB
ztIN<9?ud5e=hY@pNsJO&<ot%{_FG-U&~IlM^OYs9_TEO0ne~FNOeE<Z`M)neyQ=d>
z!}08d)bml)f5<~*rv!wtP*_I(@aF0i?QZ1iH1;BX*hRi$?$;=4_GikD!JRPrJF~KI
zL?HqQs?hjggP?3G@L2l0=e)ZRc~ptdFoJNM+OEW<QV%^B2nei^-OqbQ$PrR~fgW<p
zi;O$Z?+<0Lolm4Rk?D*#k%2~xO{lNE-cjk~P4l|`YO^Ta=|XoK&;~|*Yjp^-3s9DA
z#b!ve>s8d+qb?|8+Odc-6y4b-_6szlGuh8Ip;{7#f!AWR{+JRU@JvpU_XNrJ(f)Lx
z4+RJgtcm?u<hTXa*!%VDxtk-@Y#2Q&fRoYd!SxH%UDl6pY*o?(q!BpVZb9X*zq;BY
z+j~x-M~9Z^JfcUT=C`~8>P<;#gCaXrwBTK8{;)a-oEitE^U;nHtM;x`YtEvH*<kq^
z8JFXYc-tg3fAqRfjrklJ*>g(e<tH7)>7ewQNn_;kfVn|%xDRBb)k-IvBLEL4tc*2J
zoADltMX%dt2e|@JzsdWtAoBx8hO`WZ)|H6TO6X=$7a=)twRv=RFJ1U9!vjRI2}{Ic
zSDauL{os!$di9<VKK38@1@T`>IYKQ$s023NX%uxw%{lYhG+hamcVC*`K>_B4(7NO6
zg2V#G)Mb}O{W$426`o(a`2Y8uuy4dzRa}Y}%^sH)zrJC>wYsE@#2HQjLKz~L!7J4Y
zf})~kAjPV<f8b+(2*M~Y;K!MVwTTVmlBc1S_u_m9443{0X;Hin6F91(Poz@7P;Gn#
z_X?6hVUYTtQHp>u5z=|_;U%M{MvE?j@)%N@VO<rJGkPy(=`Y_oma#+NH|sufV~|2&
zRE3HF@G98B#e6^&;4=WDFrTn%8$fyU3>2BRzJs?6W~Q-=f<^?q9k)C#neby7<JT<w
z?w_sHbKCifgHDwSpo>83<<!2cOp0H0^6UNkXbx#6l-VayVyuqH<~3jE0m_Z9@6h^r
z`G6njP(jOPdirp-Q+j4LluaHDq)8FH4M&UIP;$h8D747C6Nw){zw3K~?b}G+2J?yz
zjL;iP-ygWtIoQCtlTQ`Ww4UuONx3luteAkuZS(hkuY_u(0_K0^yX~rt3Gnje4KwCA
z;^1v|z5eicV7)|v`vL<Bf_YnnA29miZ}%%$y;y0!*kPDh^|GwYR2^LW)O9ZV1Ewe8
zp8`eq=`TjW3%z~NVBg-DwhBH|EDV?rjj%9xQ5f`Jv*DywN69cUU39>UWCmmf0?z?V
zBUYsE?VZsFlLbRR0=9td%ea_e_GVq%{iVR&^MJu-6}Jm#690SkKk&Q?z@E$(8Og)@
z?;ziOLjl7pL(W8j@jKGIOXNSP%g{&s@3pkR!bHUQGoS%asHl=Z;ggmlzFpC(h=37D
z`llee=tN-t^S|%B(eSo%%l74ZOp!E3&2!&dW=&~b^=(>tb%0q#`v5!u{l9Yz#Y6r!
zb5<Oy?sUM+Me)qTCQE_0-(~dCz0KTbVCL-A+hYGab9q8<W4gW#<G|VkRt6zc7Wh8x
zijNx@xH~3b8V-3!G5@_bQ|4`C=t7|l8NhVc5Mr}L{sRmM{huTQ;Lf~Jz#Gp@x)}dw
z{V+cGy_xk>TFv0un=AF?(0SG60GB+Mxl;i*p9M@1^X<8m|1k}yR`|0WhE|nxF&YeD
zHWY&<ay>wNId5M!k^|PNxjfYWwjBS!yYs)f<bl!;+p-*BE(U~}4;c-pRQCZ5w2QrC
zy>Jpi&)9PJ=ZAU;uTwz}7Tr3>)xx(UV}L~X&hxGChk4&c{C}4fSaQHx)BZa;@}xKO
zRbL@uOnkEn+V1lPM+)PZS$qyqWnTog{L=j$#3@J6hu){vHMTV6HQ9*+N9H0%ZHX&K
zmI7`hU-~uT^jDmJ1xpf3#??nY73aNsO?sRmrb|t`<V5fcOpC=#I=1ssk@&R3-5seJ
z^7$eCqVrCo92aDXD<+nCsmccYRli-I+2BduD7{lz#d@ux);pd%i!qZ+!+x!adG!Ga
z1x)Cx_>OV|2AFllALC?z`TZM&$lnY)?ANeR@7uy4082KZ^6o|^YbwxFepI`<K%7UI
z>D$9yUBYss{qMhE7b@$Em}3&>t>2;-e>cU?3EBCJXYtuYJD9X3#xwcJ(%3CEw)>qU
zX8BwoP2a3nwWSoZ3nWySoA2iN(u93#o-Q{!6w+(e?F~DrS&TX+145&KhzoghAy2O!
z{I@9g<~PM}Z>4Oq{W<Q<BP7`!QE-EPP^cD3K8{b%7q#OMQ6yf;B=Ux9<N-P`@eo&n
zp%f!O%)ghhAjOlXb=epcrhc~#8QDxqFk?E0!Ohoo#vye|<#^B#fP)|8SKU9)mu5E~
zRpxsfksXRZwr%!$OvVCASkjP{PB*Q>C}QTK=#NQP_`<lOeBy<pH6F)u&0hP-$?Su^
z0+jfjzsXl0k(2J=G2W+d126<w4BN5jKnxmtf#<~}d0g}Lal7eX9)yc6CQ&P$ep(PN
zYpOLS$OvH)j)gs@B@+%Pa4d&S=jL>c`D1@LLp=d!5;@h##Q_BrM%dfx#Ql>d{vT_N
z75Vb!-EpF6W<hTjhQJ`Y_EGcQja-&irD4h6%nawf9VeYOU+F4zeuE`W&3B-3?}SfA
zXyD6{a5L3B{Q?o;96|HUE3G(wzRSJ&11%RV`*>kLVTSDx<k$hG_MP2QtpHmDEV_yU
zl%2_;9KXbeu_Y@H=&9+B(n|F*Lur2Q%3DPc=T~Z)7?L=1L<<H{7nwhddZa%s9_90s
zk12tlYh$;bMPu?OwHZBLN|K+hvj94B)a6(V2BJl-=0nMXzbeK9($wnk0?_!MdPzJU
z_sZK;FU2EtACS_%@e}MJ<j#l{d<A|lKtT%mDnUhUU*^WaIn=e)T^KNe4?v{63;XX>
z{K$Bl3j3Nk`wk?)h4%=L-i_31K}Z8&^R^+o@~Z9j6xPS9W1DlL14m0OIvu0g6qD`1
zhWmls6{nPzSYirqoAd#${ItqRwN)Se;Aq*Xz!rN>g>6m_Cw9+pE&hJwXu3kyPN(U6
z2BlJ*>RL(9a8rZ$?V9uX6Ca<)5e@#Ku+c4t<q}M@&t=OM5i9v`er5w+7CoMdB%juX
z1KM8;AALauDza4hKv}BEDO+wrx79mt)UZPoqx5gV^nI;-%u6rGJq_U@J`UQODjMUn
zp-?-K#Q-7Tg+xI9whg4A|B2Tk?^KfC!by!@Z+IHu2Mj_e<J88Lv!W$4P56krK+}@l
z_wFD3Pd**-9R4$<Vu0}9f!3Z3apgdLf$gb3eR)aZ$@kqY5B}8>k7dQ_vhR{Q4IWA&
z4=G0b4e^a4WU{fW)krs&@a$31tB=KXWuN4>R$?n&864ZZHvv%m{ER39i?Gi_j*!V=
zAgU?Em((3?6S}+BbVNuL-JC)dF6Y(VIOpnM$x!NZ8q=c7Ufh35$Nhn$xsK=4qBiis
z0OdFV2<nQ!cHm^u7WlvIf(U9G*gJXH6vJT3(0K1I2*XLX1#8HliqNXGAb7#?PIc}v
z(d9dTqHMQ1t?K)tpTUaop8vxF<gQ+Rmw-f)%t|9*vruW+@=QL*Xo0m+KMKX3_=L*r
zn%;kbJYmOgUXDK0dM3%qWJ!%t&CysFq`2AYygVD;D@)G*+Js?{I4(14mYADuW-{=G
z`<t^I&rk7i8;^I|$s*(n4gJSeKI{8jm5p^8yF;^LGdpPf+Exo>*PE_dY0UsGpKhO2
zmT6I)5@2_{q8y4lG<d*|mWsg<)Q!b#KYBli2*lJQY9JNop*NKIckBH(Y4`)*24D^O
zkNy9RbeN9n5M#+${LdvR3G`ftI-luui(up`1n#L^mz1EP93>#6?=1HJe4f_JO1!@4
zM?Cmb(fmcjTCey`Pzx@cP`IP+c-BDi<EzfY?b)9L6`<%Bmu`+AJAxvlTCeHj3iDv%
zqdGrI^(tRbBM$i`G_jETe088z?0*TlX3*&{j#g~d?R*T2#tWD|Kbg|Pl^dWl@Ck`d
zy3K3HtcAy<iW@H{kOP#*`dTf8iJPIPW<};+>9h)@X8lNc#W5V6fB?nZ;h!n=-z+5d
z$0d-V@cN<hLSs~=hydu<2PuN-ohfnitGeckB>sox6}jCJYTrO<>mgJ71SynczmG3w
z&oY;Bm{gY84z9U7Z=D84*C2ky9x)Iy!E|XX#+>DXw(HTo)A`{MmC~zMo@xb#Y=k=4
z^_c)mXsywu6zSmk_1ZVR{yLdIO3#YU3?J-r0u&K)nm0L1>C8fg2IfZI`9_BQ8Yt;<
zUOPoN^tA_bR@5Yw9OO?hEyqW-UJ-b?oljr8PbzOLI-FUBqf>;mFZUb*jwl9XFf@r9
z?*AtC{|P*JZ$XGL_QxSwl{AFcmWwvPdmDl+o5=}g*8FNeF$Y#{bXZju+fN{^fuU0l
zE0m3A)ak0;iLTf@&-dFclw*73l`K)d_HQ=o^wcRTqLNKsncDOa1i~9FFmXW`L=HgQ
zf6H&*K0xXV{r@obmSItKZ{M&Yh=?>uH%Nn0!bs;ZNDUy;Qqo8`sB}noOG`7Pbc2Bc
zA|Nm{2$Bvd-SDir{(9Z_d%yQ{yzg=N!4J%iwb$P3Jb$&=<~*=-t5GhJX&LuG$zOvR
zUQb-H{~GXKvM2e`z0a5_=VZaPN!Zjh>5%@$6jKcEdW=~9J{OrzP@IQ7ZeP#*e-yF(
zV16#F)6X6KDQIP|_%*<5y3`Ph|C<SOjM@n!ZoHRQz@-q4Tk^;Kd3T{q=HNOMcufz~
zv3_C(Ny4^T09>#&DDzwn2%WwDy@eD^d$42Jjq{QibBsv`{v>P(KTx~jItZrx`&JNF
zflFse7VzZwPmhu%JVE(?xmj?6W4=W!iBQZj97R)LSPU}Yim!>Ls{SqiNJAfkODE|~
zX-klKiRno+($*tDH9i4OlF@dJF8l9Yyk7!$(Rg;-^(q6Vy9g9bS7pCS#00MRYWg?!
zKSlc$JOy4*mxnI({!n3(z&s=lwF5;^Iyr!o^#8aTt?~EWp4$N@F!I4~x<Z60GiB5J
z3gZ26iQl5O{nVC8|5R9h*euDQiRj$1+F~Ql0zdeeaOac-6!e%*$9n5Z7sub{@B9j!
zfWITi$PX9PL)&lrmf<SzuYvmf<@-BsVgEib5>+t%=EnvH7h4^dH!o6v`wvQc1ok!}
zWmUR`sbKS?Uaj=gf2c`fQ;CPt4wGHaMQy(}UZzob?}j;8zlmGHe;rKWeNsU`*b(Ip
zaOmGq0QQ9TxOjF|SclI!oBR6sNe$5CK{Lc$0*D_b^QP)l80iNSG4g|0dDqb2z@N#%
z=)D)$z5;w{Nxb^i;|O1{#{@g@NC?_|XF}>A;~l0Owd_XAxG*+3S6^U9v;syFc=L1H
ziDpEp@ws8k4NnQgs*Ww-a*I#z&Vn^i0;yY?{4Gs{XRZt6{>IXX*!|ky`!y)<y3)_*
zeR`0jU8HWC(|KD8Gms&yo}h6Fr2+rX{UYUj``^3bfB1f+fY4GdUDR=p5Y%9%U~dhc
zt{BD?ES#Gla6NVRXxkzTX!{m>Q#81Bir-d#9&p}>V^z*KY4fOlxosZe?gDb};0;d3
z-b9O^{jOe<YrO;_a}SfT3gZM|Q(bLx_|d9A8}f_(+n`Nog-V(*Qo>^os#>x!)eth?
zQH~Gdq+}HOvLp?Oj#o!``ka@(CJnWFd!~xH<hJ^p8%SNAxuL<9JMS-Edt7b32VM_C
z#_Qk}dV!fn>McEWe~KQkhz9wUs?sa0y$R|9uB^_kF6A#Vi~;~6Oi<5+;6O8h<+3-F
zgcVCnUIG{xb$pI~;`%Eb&jk{#{|FSc`ugieCfJ!nleo?eJ;WY%o6q3gq5QneG_P^9
zj;VDrBoyV(d2ceA&oCe4@1DeH>96`RvxF}XWD9`pnBoDYgX2sk^fWS(P8?og(#8$u
zTl<W|s~kr#PrV@EdpR8jUdjg>cgE!YeI>}THt!&=Yy-<!yr`oApKXOvlezZl+$6AU
zb%Wx22{?*(fZc1V4)>RkTADCK4%Y(gRAdPJzXHHtS^KKPg=i=t(uVoWH!`9)$6=zn
zr$o0DYcqt?zw9G~DSAB$)%0R3Yj1T#^<ZPFXbYHIkkPsKhAOWU(s6=@0!8vrHJLXK
zj1?>sg0Sq4WqzcMD&xC>)+5%0cI@ozkox-i+79K1UDPx=5y2OjW=1PbE4BN^KdQes
z22+OD0BSD0j(yn?0&xBA^s#3c2=3>EWBxu)4viYv4wf?DVe~J&pe~6YCFj1o2afrp
z=ixT|>$6f|Ob%S;b^o1%Y)=yW9>C~%>k(_Cf)-zHWwyT#zCqm!%>+I+)!<vyZ1$sN
zu1CMkTCzzMPWRR)w3S!q*oEDezd=6V(BiJA87gCPVS)e*xJckrHnS%a6zrPE3d$Yw
z8)U5eU#F%F-8ZL^D^38jr_niBpIBO-s4=MSgbS1s^V<NTcJ=7(bz*qI-HUf-pYlPf
zv41|?`R-k1qZo*SMOSo^`RDMSddH~%RR4zv2%82Fqwkx%3(EA9gj>JtV5s(-Iuvx>
z;XQ`v{qU0=xNwC->Kr(O!c*dZ$bT7^OuZ+6^0hw7-fFaZKYn?5@9Lp0f3n8Dkb$f3
zY37XgM6)w!599A@k=l;4c&$nW3FiQvY<KN8YXPFG0$u@&^6Pyz)5`CA=Ak5Xv!X8*
zVq#Ld(qeVW&rCpJmQD1hyB(q^fqiCaviecd7cYoNbEe+{rEfgH#T}K@RkHMRcc&33
zw@xYh=iXLTM*_8}+NVKur|8%{S}CdT8wZ=pHeVvUJ=N*XwLG5Nm?1(7(%pNOA}9mB
zvbI`SK(`>@lO?1Pd+Ootz~1G1^K7elpvH?!ZOWtfe$#G{9hU~m^o|bSc6r%^i{^Z1
z6lICj$AvpEkY>{@g=w4r`6rR)*8<D@5QuB9wF5$lR&UUSl8`GYWl{<&)XMn0Wv0J5
z$bC@Tu=*^4Ie?T6%G4E2KinXbZ=`b>=L7uIe)Hdwy=60wPEJ-isq>uQF(Z-%Oe;Re
z-Xmh&Uj?s<!R-kigy!&doXEJFBgI;*k~j(jG9`~;^fg9uog_E!XG{B<D388IF(Gd0
z2ixD1{wCsma_Rf?{z-SLP|+DMlJ9`eauAA7x*-trSZ%U5t6y~8Mm}zfe*SBj^84Di
z9t*3b+$Q)t8x^#WLm7QuTZ`r1+aJ~5yB>U@fHpyDh{(HHA&2`)Qy1ZaCGMY|`is$t
zzXR<tE%UD4GrZ)1b*_cyGsTZ!H^ob-;YH?kldj7syq-jH`$lE@I$KTSRS`$}vc+&H
z(qhX;%$~?0%Jb|O33?)@fcG(3?R)R)813#J3u*K~isO&GOY^yj;iBr7o6nE8-%IH?
z+rEemE!f;J?6#(sPMM5KTVr;S%=!RYt)4I5(yAajjb7;qG;?H%2s-_8o4#GT)d{To
z0`co1|E~2Gc#Wkf*_)*5b#lyB3hgoybCzYmK7qY1a+8FTTL%j4q9^zB>KSg3Ke_;7
zc*JuU*y&`?Dps(#7n3E9G^i^XF?3m_=<EVs0g~TM%m`;W@=oH{H&0|%NBpnh?$uHC
znjizA`M@gJ19qikR}3fjphKM;gAEn0snGA)XW_!@!pXUE^BiGGH>KpRdegvh2Vr8V
z_=qs)Ns46${gfMMQ?MqIb>FRjM+SPJ>1G$Ti}v3s7ltntRGHff+>dkPf$2phUbsmY
zDfHHw18Gte=XJb)-~Y}{<SzJRh<hBpeZD!O5uV6fDt$j+y}r^7*5KZkf7y)}FQy{+
z>SRwE{iCAk7Q+Kw!u1I*J%LL;qRUrrO)c>$1^D!<tx}pU$PkR>Z7r%m+k_nlxs*Cn
z-UK^f`(|nncz^f^V1Vpsrk4c@AeAS<>T+{qM=PTdUMj0CiK`Iss-d_tSHZ5g<n!*{
z5mHKp_U>U9H|*K!xS6Z-tP^N5b9|Wj^_qft)1_5`r2MUZ?fM++EXAwC2S1nh?I9lX
zU+PM)&adsvQd$mme{kJAtl@@nW!9om#??Ma@*SGA@Rg6pZh~e<S8P$$nTy}jP4VA}
zs8ZSxhMFK}%c6b+R>_~L1V1$GCTe-nT@c}`@{_cnz;Pw(q=13s-r6JH(FL$;$HVd&
zJ*hL!-(lX52S9>64vC(AwKw03XS4AEpJA|xuOlO^hT&W9<49FAj~bpV>+tQ*b3i%!
zrVykGeq{!Nrdbz<3RUOEGrlm_6#>qT<~@{!--DNQ@QnABIzeCI^>9S$eeY^stz2f6
z>cyeDfhRqCinr@D0x`rNjus2C`D1~6bq{tr{!$S$<Ik{^v1<Iak3Q<O4~*35rAsPp
z2it4RWA&wDB9<SfNKV~X1+PSjwO}fw;J3?LD82H*r=4`Gh@$fI=~njw;{mQORdpT9
z;#6?!(>T*IA9U0}13iQyRw`uW25Fz~pq)xKv-aClCPX!I%j(l@ldICW-~|g>t5<RT
zOgyk(QJ#)+JBIC$bMls>YN90H?N3YU*3A(kuOZ2ly(^zSq+uo6oYnP|WP@w)=A-aV
zJ1EDm{cR`wLdcwzk=Oixf2K4AZ%z-QoOWT4rxzq`J4o7fGxgo<icg;c9=_V|{)tti
zOfkgtuM_$5QVaTj4d(5uXcfC8+_O+ZPu7)IOe}<=YzK3Lq~g9B?Hz-!Cq(Tl9Wt<+
z+W8)hIv$T<eVJ7Bbnzh>9W9g|S`HRZbZ;b-(ca4?5~6<l6WYYVi~|FP2){p2TkCxG
zn=`Y2yKBNUA0T+}f=7}~9_VMK@FLapF93p@VnWl!LR?U@i$#wF&hVeA$BFZZa!rul
zmsyp$JGVrvg~wR*kUf@Y1h%Y4g4=>E&<lXAQlBXs@w-0bSJ)quhNY0+?9zes*zTK$
z7gIs$^XG(}MobRxd4J?QuCT#FkGI9#<|KyjO(I73tN$Qa-6<0tan(5O$p8>y6Ir_-
zS5imIiQc_r!M3$o@j2Cvpj8mPwKgRmtoEkjbZd6TaEIp=9H~Ulm-{`dw0(@<l+8Yw
zFEMA~oZazcAm3%*HGZ=u65gPqdVJi?e13d3GTUp%uEK<13Y!!u&r{Sse_Eb&1Yz>Y
zoC;6T5XdaK?%sHPE#&NIe-95m==&zw=z{wkf9*)v`#Ms;FjhS*BCp+02sNpiTv-Np
z`Jp&PO%SPmah8{?@G?jFCiP-WoR7$hr90xazA+h^S}bZ)&lB^M3A-gORjQ9eEFdR0
zyj;9jYtgl@OW>vLPel>m?jG;P%+_9MMl3W({+d<Yni=M}3^PrshgTa}QS9p)-ie`M
zxu3-AFjJL&_WTHU0yOa7jGOemCWQyrR#7$Cw{Iq(5Eg}f!>;cvc0B;aN-D!K(wfZo
zrd(lx`SXrnr1pT^LbAyYydcUv2A`aZ0lF$y&G@n~@_PN~OY=&{jg??s)eR5zz$w#R
z6Y8PDvG<n(-(m*H*j2bEI(qoNtgIcL6|b@=Y7a&pT73=>-}#h9e|7M|@&y<|>?kNm
zgx<3Gc~BJZcXVNNwx)s3sX(JjJ5E&nUJ7<MM7%uwRhl~KvB@;{yUn<616N=0S>d{#
zmNeN@et;%#l=4v?%s$=n_+QqZJXGp~7Zt+xqYq;PmR43Mo#41dM$Ae0T70yk4mb9`
zQE}+cw0Y{0a;VcSzA7>8Xy<?Ez;}K#4g#H(J0Z0jDKlv^=w(E;xhSks)%g%INN|D{
zZ_EvdLb-KIu;U?OU+KF#Xl<VmL&cC^GvX<xDL-;1I?&lhi~KA;$WxB_3UW6papDHU
zDFrGt+5^PQ@gH3EEKql4NBpinI~h^6U0L8MzPh{ELq=b<b7;CkuigQKy60~Q`&Pf|
ziMyghDOlkgY(V_QvhbAmdqkyf{X2Dh`3<_(*m`@oVY{@moHB|j-0~w(05(UJru39?
zEm+O;%flhQ?tF5QX?(1>N2}cdqjrR`kWdz`0=Cem^P*g-67tJOs&onoZ)JBn!_RY)
ziHIM^^09w?bnAJx_zW(*vr^koAHY+$m2Doo6=iuZea!YPE>6VXIjt-zfQAVg%)i~=
zn3N^u44eG?!tX-Jj)c!t%rN3^tC0miv4qW&Fb^s5-QsLru7X26Ex{Na$8t*U=DQeV
zZTY&E@gLw<@gBv99MYf%YiPRnK`ctY#v$&^eL3;@)&m_?uI}W>Z0k5QnKBy6*3B)f
zup`qb%PmVTi#2b>5GQI9Wru^Z5FDy>UFxlT6`dw%s45pJt)XbB`^fx;?Gtg=m614@
zjY*WfEgrV5-??$)BgOc9C^B#VT`=K{XRjH1Hn9q7vL(G8SIgWcSp=~@6O&u;Ba=MT
z{dAe2RN~omqn(>{rBQRS<>T%F^!uz#$IWN&u~E`gLw7!{r1nXVvXIIRA5EVz=ep4}
zI8`2wRm-kE$>vQ5>=DF{fCL)Dc+O*Lc@Ep0Zs!X0_GZ(#kUQ~wy4J*_Q&yV{DGsT%
zUwfKb?`-w1E+OC+gSs@Bw;ZwS!T;7wZg`E&DE~Jy8+5`@%KR(9q^DVg!@PqH%-1Z4
z$TShLf}1;^r!|Riiyr1M-L)HkEK1I+gbHtRiV}z8j>*q4wc!hPh5o|KS>&V8Tv<!P
z3$Q7<wef@Q>O#Wh0J(1<BViCHKgxVCwD<!gwlH{Mu3SVF8EF5GUxA8QcfDcOCjr<B
z(S{^^I3I%3<iN*%E)>;M3cq{S{1Uwvt~B!1=6HJzxsa0#aYW&Zu_<*e?^~?~P;krT
zA^8&>xDb(0JXZKhvPaKm;TI+QN2LLzB$){8tGW0FW0iN(4=&5{9s!pfry5F?kH!+)
zM2WI;mU*8i<rWs9t&yZIId=X(I%=;!I_hY-P)ce4c1A6wmCGaJ>(N=3DJcIv85rtY
z!4}KV_q!bV5|Wy>ioKNmgVORIWGBL-FZOo|0C7(oK=rW=nk0yw_^e2D^~XnUgW?MH
zn6!H?we3V`o7ktzI)Vy%BBjP!$}ViM3<cAWP$nb$Xb?&ylza<Ft#{=;^hJ`?^_rd<
z)8hpV+zQ*lNXZe7rLfE6rTp+kEiO3pQdXE^s5z2lDj21b&fm5iCMm*_{_WwnwE({(
z^^Io%ufwd~WoyK?$?s9DK0s_pYF2rrY&N-WOJqK}oq@u0k}essrLv*3m(aL4OXxd)
zO<c7ijzeU4YM<rThSuYBDi7B|?@8oqPS`1_WkRB6DNKWpXdjeiPq(-k0-~%5p<N`r
zIbD5EMrO2lOiX6=vFB!Z3bME$dhpJGap9VvqI(`H%weAHF9i&Jzi|W1F4^Z{O+O!R
zV?ZX*5V8B5z4**XlH<qP>bdtTXKnp;z3fLO#~uS&%WmSxknKLC+Ly<~^syq#(Ayab
zINJeG`dTjS!e3;_yUO1v2;jKqleB1ag=MOR-0y-z*-FuLNFZVVM#iE3%VH=_F!=h_
z>(oIOlW{D`Y$~M2W4MaR<swjea?AQNufbf!p?7BT^gY=6mia#7@K8vt=K|NEI13`J
zn4C_*4fSjiMq!r8T$(iZ7~=c%O+KL*U>qJ$CS8%~W2iyhQ#Xb9Zr-N{+0dCStDlEM
zjr>7fQxT!=4J4VrXa&w}iVdp~3pCV_ep7CWzPtjJXrnFl;}7@_K!2>Wr_9ZVcq%Z<
zlwF+2;wHaJsQy}TjKlv)_mScwWBe~!(9Hi*4&Rn@BvnXjjkwqzi@OuLX*1m%cw74o
z<02u@;HAcX2|j@KVBL5dS;7!yC2J<X$~1%pzZxfKt&yF043lElzMOV@e+lFm41~F_
z3;Ru(mG+d|VC?zPC1jS#5CPLx6xymd!R=RwB&6=<5Wefpq6O;HcUGV5QXe?xCM$%J
z%F#3Cw=?VBw5NN!N|tlCo*Ch~a;eTu*++X<%5L7_ahnl-g=||hHsl<n?8m~}S`B-5
zwhl{X?{BeW663utrqA#A`5FYIpsisA=8|4VOplRd(>^l1Gz9hyUuwLP3`7riS6FdB
zXRVNnI5=!n9v=f<?~VN`_ThtX25R&u(bc>x4d|e_Hpj0Ue5OfGc;c_EB^{e@U4?gf
zK~lG;so~aNTbHUm&|VZ9Zdy$4zsr=Lf|>Vlk`7?N|LN7=^WimCqwod@WCwLoxp);n
zL!B2Ev%g11HA6UqBlqqqr?c(`cFN@wu@MY?{sv|hhn{TnoaNna{=DL97y6sDdG$m$
znL`S|e&xnvc+Kt`Y0-0LU_Aw*b~oL?>-%c4Uv-+zb+@-^Iq|L4>bMZ2<wh(`I^411
z8@NKEcB@QqT!J{UsFI|Spcf&L>35oTD892?L0AKsE{Ap(!PveDT@nO>qo8<2QE%|Y
zr_+PPR_<_|A*@_B{T=f8@}a`bnYMC8*=xFb*X^s>5qAeJW*)#K@{fvX?!K|#g(O+k
z!tMt=sRB!TuEbMkMsF+<HuZh@EIOdZid%PB8AkL7sl+K2g<{RqtnMiB5A<Mj(+wYi
z>z27+R+I}qcJc3~k}m7;ddp%Nd&fd`RR(%g>&Sh9uZR9cqd?9q;`sdymp=nlzWZn0
zfQIt$d2#PEYxL}=2T--l)RJ*WNYa<U<_C|3mSc)bw#LebxI~4L%LZ*z0=-*St02ga
zV<NreqXSB^x}R)9!uQ_$5a|_V{493JhbL9SwZH0>Bei?yOZVDDOB(k<gRjH9qynic
zDNSq9r=fxuy@RRBJJO<rOG<`nHRbV0&6nfrT-UqgtF<ZigUG#*;S(>kgX(IY4v;9U
zj<+y|KEF5lN|&*lX26H_&iM4eXScJBc!=t9?m^`KxyZ9*W&{%jAFU-N3)1;S7kE8K
z!_H-eX;vS~E;9!WJoc@F5Y!ge?1MRl8|M}KHuiOF$#`$K10dy9(k=do6RzMjvIBnL
z#Z<HX7{4vZDSGGGYSBel+T}iV`h<$Z+PKM85niP3xaIg0B3*;361}QG_mhnWbxSbX
z_k~UFWeb(o)ka1xBj9Vip+?!-b@QrcJ|QqS1qko1=C8e;Q{cFPmA@*p%4C40_=?XE
z={51(%&IjqJ-q<+!FsmRv>=a%3E^6r_dyF}D`62J>|(D2gthiC?kp($I#_svyqE<q
z#5|P_{ZCkmF9SYSL-AM3hq_~LzYa$6b$rOHdlH07t?{xTCJ9>mJ-=RRH!tJ#R`%Q5
zEJ79%P!sb+LL|R<Fh@;_mLc(F^9TuV{!(X<)G4Zc%h*lS%|?FUotICvdW)Gm-)9L9
z&og1Acr~PX{a}?&^AfvAVV@fEXsoPi-!*EFN8!=>Y@IP>+?R1^l8|!t6CZ1x)eKW>
z0ZAewT&}=l+BisPq8vFHc9_<+8ypVIkS@+@hkQcjN$c-#dLT((#RzU4TAr`k_faV4
zvs&FMGXLo&A<&b^v^kiQ#K354;P~M1d(mtCir!erW6E$YR{C!?eUl#e@X*atxel=^
z`~eo4tUe;CQ0n~Agtk+&W8SHGTYKs4_3L4%s+|le--GGpi?o#|UR!17q=CO)72h0R
z@I=2s^&nZFMF!KTKubGc+%D=Q&>_*bDNavP&UC5=H_UgF**!1<<)7bq&S~DJL-Ek$
z-SSM2ZQKZ;vm%7M{_3bmoecpV9M@6@_>kc)pBUQv{I0kcvo^ripui}h%Ynljz`;w<
z)k;JsrdgCsjg$Imh;hqX`8@;JbL5c*gY>9;y2e}A=lp;V(L=)yYq9SsfBxCj>a$<n
z`6wuwnS9;x*VdO2o_NiXoO^a_ub&Hi@z_4o_oMn?YeEc*<-Dg`YXT)8k>VVUxRI!1
zJ2Xi@^I~lo;qBGZlpMYEIJCIYdo4JvmqWBekCW6}JJUP~ZXC$kKgfj8G-_^#;d>s<
zwJ+N{#awt757&_1ft9f)UTBTYWe@(Q9;#;g^!_@+D3<%_GZc$DL!cito;NiaS4rM<
zJiSvT;rhG!FQT=lW)Vesr?%on-#4E@2h<)*`yTHEO{PD9v=Mj)hBh~rynkg`q%Gb|
z2%}dH+xGaEJtTbx$T+g57;KXalj0OfRIQ7Wd>>uDE4!{6mH+r(Er0-=Wa6Dt>se<c
zwGd0lc4K>9HEO%D&p8K&79ebf4s66pfap>vn3*&B7pygk2ULZs6D`L^4xo!9o-cfA
zT+ai$@Z6B4Q|?#7(gI9|y3Qm)f6%{c20Ml;T&#B)Vt@gzQ2u^a@13jn*#SXQNZV`m
z@3%mK0elO(E&AgAqZ|DQi#-I|2L*QU__b{Bi&6f2YbijBxe!0~Ma7d7LyXS6<WI+*
zP6QW~?!=n{=*2$*!&l(ySYkw!S^VE&m`tGf0d(X4pZjTYAa&LTXLV|>GP?a2wvk!H
zD61q!>B6u-g1g`-!+wQP*60A9i7d%I(CxqGs|xrjf0JWIZ3sB+_V=ijYL)kDm=QpK
zan=8q)F4rQ4=5<WBe4OJFAk7%q09&n)yi&SVuGoqKuFrdPBlMCFaodBz<?GHZYu$e
zAtw}^`li#(B87i%vknhr(5ivYBp)6rTP#Qv?M4q5Edf(>d~b?Cuk_^^E08!PfmT3q
zL($otZSK!Q``>S}lhkImy$z6ts#(&${1{47;?<6Vd1ojy*shz>*Y`4%KBMjb`<nLg
zs0Ja)LRN|DsM!bo-!aVl%YzHIeYQC#@KB~5m75s;K23ip>-T&WvI&r7HQn+;12plx
z6x=$ouXdLeCy(et0r!j%N<<4SV3e#{DPSm2GqnAYf9DocIN7_%QL{vK^4YuUF(88!
z5dYV~U;a<cD<C(YV-=SR0s6>xw2aywtpM>RgknWwSoaL-Eua|)ZN`wFn?IbN{`-vh
zW!{H^V@KmV+%AG*g=4tIYyS(k=tqy;zPg=7ueS~MK9lkvX8i&2CC%`k0u#oig>NYU
z`aTac0DA{++y6Oq7W4|7!@7SltMPfoM9>Yo2kxqX{kHjJWK#mBPOW~1T_Wr6gFf0<
zy9eb8F$^=y)0@-HTzI7H@rSdQOkewE>4Cp`A-d(O$X}J;U2yG3FM&)NMz1%{K9-RO
zI=62Bwjm&Nv0a!^D;aTIbj5eyKTh`-$h@!3f9WmjvB!A3;`FY?yp4XHW&6XdmWP5?
z{WRvYxQCnZ&#8q}zUpK{R~|*>J1nijK_)rzR!!^NXy4jSSEY5n<~DriY(fb6n62TP
zVVyp_P<_GKH15@>LJ^+DagC7t+27l;!889&O6i;YUs6ir;==JZZ+xytB^ilG9X_2y
z+WSqUv)_3W;T*!KmqdTwx5m5RRw#c+5T!vdVg+|Lfe{j<5O5SmGe~iA!3#+*6-!xP
z6`C>jkE}eq^Um_et)?<d*@3I99|@OzC0jlAVI?Ui>X%5lI)kBV?-b%wr{TBFU>iPz
z-Z#4|J_!n8@z7j075UbI9jqG-whKz#XCsE#g96s-ncv}D7_Yz~Me6-=5SyY-pTb{G
z+uPk!!)6$51YE>!g1#q~$15MaQ-!T<iwnUn=e0WeT!u)G;?4U`uI%kO`kV#oHpSb2
zXSlN&=6lfkia{8GQE{w8O-kC}E3+Y&&?w+iy9~t}>IiB0X4I@cRCvnUIbqW5Y*H9W
z&rh^DlQeI|{fqK)l)gn}KEDTh@J>s;Q=)KoVah}O_C%Y(TvedV3fv$&k7AV41Y83v
zd#~;zfy*w8&{Sn#W%9{Z1oy78Wp6V@#M1PZb%D%JbBr3%>qv`kf9SI4A>BPFe}**2
zD}3_!JLOYroJqUBXNBVDt~+nMDyT-B#*Ny~PH69`X6gx;e*GV;1sA;9ftW#0;G9qB
zPE=fl_jQ%lBfJ1FpS|B#sYSkPq}6Y50;6dnU<z^bDfH84!CbcNnMTKhK(djQCm$wv
z76mwcOX?(j&Y;TAbIb;mh&EY2-T4*^tQ^<%i8i+ebAqND-F0u?=HrsYCDikE;%dL9
zY$sdmaPe@W*1h0K=4X@;s#Z-IH2GkxCN%^tPvm^Y+vJr$k@IUAOuBC(v+`^__(#Rk
zDWj~-`)l!BZDyU$&k{>C_3;xZ1+pSE((l0AVQTtq&;2jlGQaTcSrfDTH|79Fx=$zl
zYDWy3-w#x&Rv0etX_ssm+5IergRcb<)z4N$ZNIlhVFU;@@@yJs*my|_rF=>Dqa$N{
zfTZyj5JlRXsAPeSAarpV+wRn$nkw5hhEa#BsO`LXsfCrHTyN{oEHx57dzHLz#B~og
z$8%2|e^ewao>oBLw)q_IC|;RmB-%`AdEtcgZo2c|w6Y_I&31Vxmyyhc*1x*rMDXs*
z;YBwna)b2~`gubu5U`!?rX4WRGNTgK=dCnj@z3yYCxOtD;O?7obktG<WtFB?Hqij7
ztHr1)D~y}iKAjwpGj{l&SW&`rAERBFm9lEf3_<OEuEVWU&JJL7oj7^iPT!y0=jOCV
z5jS^zj|Q$`K=3^VL8bZ52o9k;k@4}|x=8c=$)UHdTs^18W?aVY;q$_JV^7fY4`k9t
zN|;Q&6AZuVR^*tXs(w(bj(&x+Ln5J>6WU*FG1Aoo32me*u~1%Nf86@2lw05tm9&E&
zP;h@(RXzRfBgv*UpKtI)W{&ym;+oN`I@CMS4uk#laDa{*RG1N*w|K6h-ipC^34Tqe
zh;B-jusYu6!3!WSsJhrKufUDtA-+340EhsD2kB7da>aCh!RI}^yn?2bhWu0Bgj?Ef
zyaiOU9?UaqW5XQJZ_<pP#4sYq&TUU_JrdVzfKq2bp1wIl=2LfemD7${DO{SsEH~i0
zF-2V(K%rZ^yN%JU_KXtPTMl1j*Il70q7rpNvkT~2l8sYx!X-jcA9iPeoOedKYKTIo
zR$aX(p3hUpII|B1mwk6kNP|?o^~L1f(^3hb6y4J#5zqA4Picf*C)n9w`5}3-dhZre
zyU<(&J>L4`f&bXM)C>s^1q`N-J;XdwZC00|=^K0m@mTcxwwukC?kje?I13ODIPI~@
zf275U`Dy|QN(6C0;%>LSSP}*A(tQ!Vd*?q=DSu5&X~Zt}<2Dk^A{L4UWrC-bsBI@L
z)0>n6<@V?@oguSw4?NRWpODtWtPL{ktsm{|eQP?&BU3)H7_Fyo#<?OyxU{ICe{w0}
z`SaJ?xieiwnI0-pWMwE>UyO+_$?(vg%fCAsXbC6T5xF1Fp3Sy06O<$l<zCTZj$x2W
z1QzsO0*bczu_i}9Y*|$2;o2jZP^G%TBGU;@2GPcBp6LZMrRL#vc#(34**~DTC^24;
zsH<P)Z62WfUH3_5l=ff)%o9;@LEtf)+Cf!3{&_95T149Ign|t}Up<c0s+OD@Sb7&P
z4j%Z2ev(=cGF%hC82;(E$5Zp1>PKbK;x|JXRRGjR1l1Jw#1lNIL-nNI_x1Dp^}Vbj
zu@o-vx~wH#^O0Kc`!t5&39naSR!Q(JWA$#O85v^X*-e?)KLb11{UhNmx|EFj`gsSv
za$8IL<$=Maz)ifJn|%O%QQ&l(X?&#0EYVUW4L*BMlX&%1yopvzLd!m0byKFHWSO1k
zcv#FnK&jsm|DfU7*C(cob(cZ4G9tjh?m8)g<ij6xN?7EAe>K!CZgOv88Orf|CVsrD
z0n5mW2fAI@<X*NdI%#{C*MDf7x1b-$0c?K;2X_}tm9%TUEZm<sCk`GCN`(Bxj-UA}
z1TO^wJC~%)o%L66Ons)jxVjMsQ9{WAz!MdL4k_X9zILAbz~#XFe8;wQW4c;P+4Ny%
zhn#6ZVgjJ$Mg_W{&$iK$&?1_g32oNvS~$Rk+?zp>z9$I{^8bE=3(isUjnOSQQudy?
za9NvL(dLPw*Z$D))iuH0=|+##WUAuN7*d|E`Ww8c0M>^z%y}8h=jXReyW>RXIa#l^
zW*G^K`$%O&g-dD<;x4LpPYFa<tkqT%D>N007`)%Y^O@X;tExRtuZGbIw8QRKw=)>R
zyGmUg`N}jcsWv(X84)F;2do{~T;JBGkWnxZ?lM-jGMD;?gYx9W@HeqL!Q^*CDFOaS
zX~z%jB0l|)Qi!;*Cs86`w9&_9MiLtR7p{V#DA`T|_0E&Iz&9t58w?7aGQa(7fl&T=
zB8?qMDy{n0v^8n1-!oTzYiagraN{A2-sF0kj9;q}>pzq$eW6=U2Fp(bN=IzTV@Iuw
zo^c_VXybr)=Z`@A5qpy4fR;{UFr0&X9c$%3or?@s9Hz`CHFGQ01obJN2;>J976#fd
zQ39_+xxFV90g@GpFSuIxnD*&xe|gEKim!glZfpgf>*XdrQak5OGNi5LB|2p!d}QsH
zVNoPCH|Rg99A3ZA+L&FlA#5ejqeeeuR>>?D^hG(#l3!<$@6VFr+O<}Xvs}&ADlIT<
zS*~81Q{lq`U>TqE`UH~od0KJGrIUBaFOPem93X{~*Pzg-nhqWAG-Uwgvx&Pd4I43C
z&sB_B{@$a7F;F1ovR8+bzBA2vylNN~NhFrFa`e%)8y()gcat5!6|oTLIkm`pjjiox
zmkHQomBv*~;Mw7#&2b`<P`TF~L9U7GV?|zOHT;E~w>2a=mQZ>WS@o9*I`17NJXwX^
zw-s6ok2Vn8+DY@XU#}#kJq2QK`c(^Ea=JcnXpMkSq(O`Sh4}K}@1KE8q}U|7q3VH+
z_h{g)G>`cBw3qo(Y!{S&oyaOl;~s4449MM_XmhNIuHB}WfW?=lNygr{9k#jwqCDsi
zo*o~1E`7QTV^pAFwp;no=IPP@a+L7=opP+XsQrY8al==eWcbQv<`iUv2z**k1il^W
zh@lME7`Kc5Z6p3w3Lk*?tty1H5|6+?7MLW>R!f1^`>v=-**(IcqGjZ;m*$PzEFTS8
z05~Cx)G~qWuNUPjP^StZT79@eP_i>`I&@v0U~s2z=+%*jBQsod;3Jze`?QYd>)U(U
zY%#Dk{am=2W&|$CTOkM;QFUNv5Bn|XSX(UYOD=g}JEb!~sYQ;Th?3PZ%iq+|6LHX7
zuTLz|5b~bqd{r@2&5eYJK=tG^Dal2%1U=ro6(z=Sr}a#43kl)9zjL?B#pG}N4C;`r
z5rt6nOf!ufa1RDsbXjy=+?`|1_Y^64q%@`q1Hy^-1E(*xiuDiHrKeZVnvuT<QF?N+
zKX-KP?Ro;pd7$(QY<vpO_o+o6;oo9C@1OLD#d-~8Cbw3SwFIlbMu^Y%+)ppQb0!fc
z8#R}5W^RQiYLajgIKnb{C)(@3QV&E8YH}1mP5o%lOwMZ{=U&`Rr9erLEv#K#a(b^_
z1R0oEp2=-q)({@xaXcTCY^-0i0-P(Ffw(~#B*%f1N|ic;3rqORza&ioJL;g{VQ?7O
z=3AKSVK7=+Vt*rtpE&;(d@pjPz##Lwfdn;L55)Woiv$uE=mJlGZso6cOx`YqC3UYk
zv!B}$t(BhRdS(Fb{=-dUg$Wzbr(PE3|7|ESI&{$80(`)>SX`3|;3%)JGCU(lmI3hv
zNcKzvQIt8eOT6V(Z|Z#qjJF<8o^|5a?TY7w8S@*N*l(aYb|3i)RGEK)7u`6<w+mw`
z$4A}+@*pyRQkKf2bUySvgerfu)%4Af%wz8XQ8$5<Q*{rB*1Ada6zh&zcg!y<Haz-V
z=D&=r1-<qwlSSduj0R=1Hs?MYI|v{P|IQ@HB-Y-OprQQC=;Aa$T@A}nL5kV?27w}i
zxfgW&BscPyb(0;Y3p5-BX>x_?9gWD?&hfcqfV|IX%#1IPcO``8Zt;hJbH#1zP^lYv
z@RB1Tn%v6K()mfH50;&faRVZ3#i?Kskpx@(N4Cg-y`sl3sbg-HAQ2h|ChPYDkMao@
z^Z5FoGQGLKOvL<VzqArD!$MckE~ohZaS{aP!LGUg+*y#4#o~LcTnIT&D36;w+jnvb
z_Q2;tc{xvBTIj}7;Z$v(u~grfzrMg~j*AF%#u|BVvK1Y#@pg5M$pHV>cI0X4$H~zA
z05N%Tx}mbj(X#ReerIo3&%Mw)tO$G(6D9I!mvUyrdrD6S;CxW`F5(>{vvJWnBYM~o
zK2O_;{@Y#uwO|UcV8AeJ-2*%<@b!%4WFEoo;xV)j8*xEF+qg6v8(X!|5<vNraFV3D
zD$I)7khwRT2wQBuyucNNDXPq&SjZ>pMi|-T%Eqe@<TqI0v03K%pKY!Vl?U?o(;96*
z?R^EYxM<K}Hc-fWJo@rO?*SQtTr7H9^!_)JeVcdg3U69pUdZ(lxyd(^v*Z!`2cFy|
z8Fc6)ihBe#l?%_5>>KrX7YxcNoEvy~0yjwMf*4r8oI((NKhEnuPe>?#qaV=zs1Szu
zVAg&6%*~l#x7B4ooJ~DVxZ8+tOP*%Dk9KTvfgFhyEqdSd8VRnfRW$#nxP`;b=|Qy4
zW1gW~RhP+Lz<OB{!=eGK8E@M!wUx_eb&w`p+TOX@)Ef4QkgWOzBY}T9c)u>Na%~?e
zzh}P!kgrbBrbV_-ApL~e&xYEbpiT{lMUgBT)ktV4UA5z|!SP;ABxDJPM54Gi@E2J|
z>ub)Ku$tTG6g$In3IVVMRxR-t;rxD8b`?!!>>@t#p$ti1<>jyTpaYTvE(|h3RukDj
zc$kwSKp25f*^v_Xl<N)G_JV?RS6|m&ArF!CdQxfB;i@0&6Odb9{2|cbesM|wSt#gU
zidNC&NzY+NVw29T9(V|a=g>il=mUEr#=%{pjn<cpz)pbxJ-nCTUk>aOM*Ke>-15tn
z3t{wp!RW~(h|w{+aut9?Y;#JWkOscrCh|WB*SipaOiCucjQZn0nSSA=TtK)wKZnY;
zWGo7;*3{f(ODj>_$wGd?X9;vFofIy<M`}vZ62(n`Fv^nfq03iN*GXkuk;SS|0{1rJ
z9=Xf@r+B}MDc)ISelTFn(kwVN%7o^XsAC{k@+Ku!Z<G6khUnUj3@h_Z88Y3kzVb&6
zj8c9lpXM;t_^1#hc(`;5(L|GV*~*GNok=D5oEx&gkz-E?ni6WHws_-<l>FW|DwM-k
ztp6`PJ;1y;5$k@Psu>t>8rpEKo(#Bp=5z|~h<sH|5T(!e$W%)Ej3$$g0A9JFFD>ah
zrKg+LS_MGI2GnzwpobdrT~zQs6IM@|tTv1KmL;eTxb8c^^op3QP5G9|-vKI;z!8#}
z<>K7#H=60!M+!`aN8!aUk=<0$7O^t<7&KNGg#ifvAtrOLgXXumiYP<zH@L8)*9zpA
zf{Eq1Gxx~%?F;|Ht7fi;{f@|)cd3vGyG$u}8eb*mMc~@&u09sB8lFr#IodQYk#QXs
zrDuQo?aj%NE1j`ZhEC9h*1DK?@I%3Ode+AkP`ruSr%eo{h%Tr2!<LUH*$vN31niUf
zV~aE{hjCKY?%U^eeMIYPf4P-Z(~*$~maE%zWXA6vtALuU)WHm?Rau3Cy9v|ezPSA@
zg9VshxiJxPyXn#v)$6lu#*BT&KoI(&$5>2ix=4+T>rr!S^nvVic;K?*o&iu6<auTC
zR37ay<VR=TZmIY*8Rh;5ZoxpcBWt2M=k;|x_Cv+YJ3U3{#qaGiaJb9K(&9apI6=@Q
zx;1*97NTsY;hNuH^#+feyg76_5B_z7F5|i=uwr^MMr#P@=2Rz^>liPqW%Cll#7<i6
zvp?*pZF-UHzpOIFz^8zQiO#t8$J^uQ1~5&r6lvv#7%;!RGhQYwV6VqP`s{RLLDdNz
znIzb|Qfbmgnl!fObk|B410ky}_r<=tNMO@z$1>P4(jJrqf%M71F$zLJ9txJ35^?Fa
zhYQ=zm;l5A&XN~*Tgc|fEmMgvF}8;vG?JQVs3{D1^ZubMoj#N5x1IZCh<PX|Uw?JH
z7mtC(24T07N{!@IlfzhXZF{hK0*7nsOQ0DC#@<QeT<mqd6miI1y<Z6pd(%np@6qI1
zFa?3gshr_lW9wH3>#50n80<kn$3O>2<=GvZ2lCDE1tvhBvdOrxtfeJ=-tvMR)Lv|}
zFBI3C`!hC!CKaeCFFQb6V}Kf;9|4eNTf3{S9klO64K;aMd?TA|2negr_E)y50o!%F
zv-WJyT6MW!vzj|1*-pFbR)X~9ky={AT7Cf$OLDT)B)dlT8HbPYB)6=N{)YVqr>;f!
zHpiP6u`*9E-yW9#`?m*J%;Cq?=+J18>%B(7e1a4AKxxY3S0b<BUDZ)OQ{LYDTZ2di
z`m~Xxj92(JVS&M%@qgM}`9uiWQ@-SHralF0KS9N*-h9Bo%R@PkfE`9^8|NR~ow|f?
z-MhX-Ti1xO`7%aCv-AL$?{`72UOm$Yy*vZKXCqHlQXx@B9qx*0Is%k;pD9d6z7dm$
ztqkx>o)F>3J`AM`&tq{<<LSAE6fz&;?TLF(xegl1F^YezlauJFx<IeEeO2~inkfYR
zQ06zsK?mO^#BTG^5y7owN)ULw$hamcvo>84=as;beOs;A1=Z-GPc8l`IhxL(#vzC9
zM~AX1S11#Ct$N93+w*OKzK@O3Xg&VlOJD~suoiIWjK3uNiz)L1OnGePfkv8mK{UsP
z`(nii15lmQ-?*mW`*x=NmApxt54*KG!x)KBFsiAn69Xsji6liV4d-H`)oeJTFE}M3
zwXqw@K6oq$gVFL_oFEcpjolOxselibH)F0_wRfg=fk}BnX^H?&z_ltt#gWgpk6Lnh
zwFSAf=Gm?uzV+#Up{NSW+)+f3nCEND9?9gsEjOu_sQ@yyWig%AN1P1U9zW*`>J<D@
z_wv7A7*8d=N#hnusTdw3I@s0HlAwOxoGdve;xH)sUF<26j7|WW2@LPQZZHc&Gcm!C
zJ?>kZj*^!Yh6gI3hqugw8BVn!Y9iCCWRxk?fK(dby+zvPu^@?ptZ7yt`t~tq-N+VK
z{%ij$0t<=@w-2Ec2N<abvfF|oc;l>y-~%*2z`@6~M3?KI|9F_XCOue!LNAZ-zO5kG
z^mmZK##`NCnkj*mpBTWHXo~h4OjWj+EwJ+YKh{lwc)-My8t%^>Ny!^6-O*cPy%<I0
zq>xPv!#NKI<3`yI+4)<lSIOPf18_FtE7hieK}p&3)d_*`)6B5%*Q3+elERnc{PQ?4
zsvo=FX)Wb!<p>5fQVx|utG)mc!=;5pBpVx&YzXl|C3EhjS;gaPt3Th$uIdy&QdPbU
zBrQv$=a48MsIfR(Q3>QO$Okz1$!kH>O{|EktQ<D7T~PXv4(`?uieOQ1>{fC~`_s1(
zn}EUj7{PWL1_du5iON{bX}$8Q(a|oE?eM$c68&L=v5&8|=Y!U#nsf`JJSmeb_~6zX
zJBq;Z^OS;J2;{qQ#$oc^aAHC8U(9MmEV8h09}3I^%oZUB_-u$(VW%9$&L8xK&*|T!
z0ys2_-Y`jLf;+HFErCe)b^nps`BLDUs^{c{LvQE5#0WU*I;IVG`6gdTwj$K%$zeYL
zz48#K2@YcddhY|IT)jgsjdbxki(acCN)N5Z)oB>U2#p7!bOrt;1J`Ux_2C?1CSb=M
z);j}<z~#J#st8b2?Iuiu1Q(b5OnAEbv6~odX^Kh&ZJNgcrC;fb2jH%q8VZbv|GYie
zz}*_65z2$Xj6h~gpZFvz;$5}mR7cA8C)}^Yms!7pSTc(&4u2RF%dGO6-ZjLqOhraH
zGkRLW?XYO3eO!XiF}TRQmN=HzSlz@NHpQ8{zh*;94%EB+fR0Q=s`BO;OIwN0@S~GO
z?hNOkgepyrFTW}WzE?Fry_Fk!2Ju8U<y?{@;r7hekb$}VVraXGGySWyukUS7L~q^i
zcGlI#bz7hLtXjm8L%|6*qL5+Ws~l+qg}TPDH3}zuqlr*4vyT9+2VBPA5di@UI7sa>
z9dRygWLmpqK9qia<niUPN1xdFO}d(r6S*y9?kI*067m1-=0!O1*`m*Qx20F#5SVg5
zar`ynOGJXTFT}Aif<Fb`rFg#|r_iwy^#M+79B9#N@j1F&J3-Fw*^k!2v98eVcK%U(
z<da%phO^O1xT`hqmn~bugs|J_|FpBX%^FsT9d%6QoNtknHK0ntl?W>^PCfOkP%jT)
zQpt(l`C@C7OptBC|E4R1@iX3z_^7*a<~YTJgAjwq;(0h-Jhi+2cLT?<tbfb<+XsqE
z$s0Tp)*Vla!k!KS7ud4SHukqZb_Z{G6&bmJ8z2kli{=m2%X0r}0bZ<qppS!)5PXb<
zthv+IbDOYucpU)BeC`Ci2u8j{O;~>_T4#)bSibRPRRVZr#PC{iZ_y^@0|Y~~%!GeL
zcP10y!dqAOniN3$`x{f9|4^6!HS|AuBnlWZSIa9<OcEsEgb>Lokc7hE9J!<2KA@JD
zz*NdB>mquvKgB#Cz6lwz^)Rktko}bA23qSr?}ak(r)8kK_?htcrWi1V@h8z~#S~TK
zLN4tJ)^H)p#b0fn9Q@_~j1kzxgceC249@7uA{RCvtheri^ye5!WdMkStQ>%k4kSwc
zDRE>HF;`#E5R!1p2u4flCJAOi$?dK-1p{G?c@M55qE-G}t2#!Mpm0Pt=>!hqo;M#v
zi20ThQwac5a_A5k!Q8zAXV<^qmJ7_c1!VEu`g_WDfD?S;@CS5vCgwXd?BvS#=Wn0j
zV!p*p+oVO{o?T9EH*SF6f=H&OyQIG{UnYj84I|<I^jsX6o@;UDMP{A$YGc1i)8s?Y
zpE!1jf$R031Bd;6{MQ~&gvrS;qutEyEkL;pM!EUhf?vG9;}Q-|>JE-l1!9H%E((X3
z@n<;wwOI<ze@O5C;{<rYyRuj@G5~2%b^o^h$^Zr$Q=l-xz;DBbratwYgQ}2_F(?!x
zkI=H508OEv5N5UnLc0DQmjg3EEMA0;_rVQfB8PA>73Gac5KWJ#V2hb29-d<2f9Bl3
z&nEcqDQ7_k@G}7*rgQ3;0kOT`bo+0|6$s`LeeBDyVax%&@u%Iv!JJy^jHvKlvVn8O
zOuO@^{d2wV6EG_6p0w6?ouLQqMy<Z^3vZvXgodH=F?Hm^=+qkOGngF|(zxIu-mnHo
z`FSgG<}WIs!!Z{Q=P>!kHh(^Ybu6sq9(G`|cct_2vW4Hx_aIK_{-FZ*fq*nWw}y$t
zcXK>P_r6kB^apC{#+rw3R^slu-&9Gzn||hV8Y|(s|M)E2F>x=<4Z53z{50Lbg8)Mg
zYgsaILnNbKVM3H*E-pJd=yHNR^V$xxS}FO`SQ{q19Tg6r9V)zJ#(N$A9;BSHDMeD{
zv`*DM0|ep5`NI02hw|PGtKtqK-Y4;q2ZbR=Ed7A#ImgN-L!4fvDKwa^iQVa2CrE7K
zN%cf26P~qAo88W7Gjl3WcU{V^8G7Mab*YkRe=>0XBJq}E%GJY>uRrv@`fRdm6E!5}
z>50O&=F{1|bLoa0tiIko+<?)8xjW?4G-?EhgiojYu`^TcDRBwbJ!4Hf*Li=h3vd)@
z_Xfj@Uvi-(EhFXeD~))wBh#f!hpf-0b55OIFoFZX*uz^N;)HB{CcuPTMA?^JMmtQj
zN6<}na6CMxbvxVN3-db$=?_M*q)HY9jY!JlDvJ=iA31I42BD&*zVxy9{ZhJ|4Ho#8
z(7=nIWk}cEF9`IgwR3(ybK{5RhG)#5VMz(OtG*Nwi^+e&x#_Y@UvBo6r;jbgm=65f
zn%}h^*lWFAHo*;CO-sL9jn1zFUB#?y|KIonI<`4Gc#-jiemoBXr|EPtJn}TB-RSZ#
z!^EYG$|YYpZUo}7l3jGaVE0QUJsZhMF9`Bn4hZf}dnXDc`t8!84805RKJW=RVcYgG
z#Pm(_r_WQ`9@IlWuOjm0?|1lBJXL4l;i2qH0;@DE+suMKVErL-_h^vy)x{YyYpdA5
zjeVkvE8nHm{bYdAhg{jKZR&LdswbJXNT+mI^GCZk4~CtWa!iL(&R&HV!bL(J2kp;O
zlPQ<(OA*z+FxwDf*}6Y)2>gRPJ4^V-Ab%*7ZYZan84;e$&11!_lO$B64lVqq&yLYT
z?E6Q2ax;yl8|NAR5*2$~+Mv0*$H2Ang4g+_bU@2b$$fSo+6u{Pp+D~*lfN_T91MhR
z>sjz*#r`S@?(c3BS!|fX#d3MSlE+P_JH%T5P-^^PM2v^{_1+77Rm%a>-Mu>MgvCMW
zDL0?2^^Qh`%QA!dteugr&r1o|sY?e7s;(D52C9FKRk^P8uydX0lW`@sXJs^3$+xTM
zKZ1Am7QY^{nm7pQSZ{l$d6GDu%W6%`ymxp$ENG)Aj;>f7Dp&UHIkT8SSQ0GTPY{+s
zb2EMWlOL+=!g?b4mLe>1o;$w9KUDADwl}mKI~Ip8@7BYaYZ|=vWU*xJChFaW+4VCt
z*mYY6tU7<Z5KsgLo%k=}%3P`YNggg9tcNHB?Lc$_Ys%QKfqNHfB9wP~%D#l&=4;OA
z@O)TE??k~r#5K*(NeszFh4Bt$-H!$W5RlEYa~RZN3G%KWj;kOw4c_9Vr4q+o$EK1d
zp@7^4J|e5t#-FZ-2WmiVQO29LA%Kw)Lo=;*t$P53vbevd7hfb}W;^mh*;_BKCrJ7-
zwqW<^SB(%NXrVxL=!=;4c*sT(1mt`hw;!b#!7q6LpisLyt~+btM9lTybQ@((PO}yC
zY)c%r`B5(#{00KoE-%_z1Rd9%#Sv%4S@IDFnd~6COZG&G5u+6WNio=7qQ!cY=L5t~
z>Q5%>Z8N7^eRMZ}G?j~6pGia*wVgFcoUQn0NE{@P^P3n{TlVt1opY?V8+g7LTX1+>
zr^6t90;0Se%YEBHV+>kvWc~$Pc_iJDdgqA!B+Tx<!=0)7l_|iX+C5x!iMA;MQCiq4
zL7sUBFD5{co$Utf6>x@-YmbRI4r`fziR22MhGqYB6F>W8(&WAruTwrFQ@JadL~6a&
z3>Xw^#4J%MC)cjmLaBF)ujzS9nF>!`yRsl+{jPZ9vj#Tk8%?{?;v&~1`);sd_Gb+6
zVp>pG_Gs;6_PuNnb%Q%eS=Q5{=*_<Vk3kn>2qr!K6jQdjQo_Ewg!a`~taR(>oiJ5b
z**wE3Fiy!bH~|iBg^BQ?JbBe^dNDZZ9d#DfW%U%`>{10nk$ozROk}~(vz>2@db)l5
zgn=p|@M!gq;st+n^E`MJni<U3YI#}Vn1*}MsB`@F<hJL*Y?3dTAiOK-MU2~@m=k|Z
zVrO<_;G~FDQ1aKhkPg)?N5!&=dNnEEp7g@-GI1ffp2*$#{f&nc6_(vY3FmwJP149>
z-+FWdUiC?e8GNbN2`(NHBjJ4vEpR>Ogl@r*oqgFaeB*OkcvNO8aiPeETPCDV(ieu8
z$`2}Dm0na1pEwMOjd32|(ba#oeUf_S>ei5~vfNESYre*Q;<=TtUUs-(gBMtT+z?j1
znvyebx)IPBxt#+WJJDs3Xe(G$_n{9H@-TH&IWyx%Ce}rqTq5^ZrUv?A=c&`sHu;4-
zQn#UHBug>BI18jV=^n)|hfxLPMLp*Oal}LDO3$vvnoe<DzGy1fxp~1<?^<onEUJ}h
zIphM}X4YLGs=^)Qol0{4O)CMbUirp~Q6{ua*C%q<t)e*-W5G(HC%}VDe8Q<+l~!il
z4vkI2Mg}&EN$M=h7nz*1J5B%NQcS*&6HY!RFlV~dU2yrTuUheh-4kf`4OZuTLa&Ct
zRmltPeq+0f9d~c}`cENXZ9U(KrgXILOGZ_VUV`8}R<I*QLd<+;k^wCGuTVo_j2S40
zoZfEU>w321g-NL!L2CG3Y^U#S+g7qY)Z?Rd*sn3$ho`PctGF)lyv4b-NP_(spnlj~
zMvpZ>AK$5p7i=L-pFF(kreKXhN*RE;Tp26p=K$C$H5;YJf=fad>h6Ek?7mcegmA)$
zBG~$h&=}d0LF8Lsw^N^U?o=tWz00Z>x94<?W@t<rq&L;8=D*8r&sTg1#KBXK#U=nD
zo=jpN3ZMZkWYaS<GhB)D@M)OmVZ*Nsb@hHW4rXTYp2@@G?9+sA>l3Gjo{cX|+UstM
zBi5BtYnGW9aa?1iMoZ+a<#;c>E;1x=WAg7;nsRjyrx$)RR?>Gnv)OMaMZSC-eySn8
zLh$a#M*1jM_dagm&G-4GDg2rbaL#{pcq&W%XzRBAsB<E2DjQ6N_U(pdPD|LvU{R-!
zRpjq`#%Py4u2P$lE6=xM-xcb1v6+Nr&J?1G3_T;ZGnO7?jaC{dWqNvW$G%CubBnQi
z?AjAs%_6xZnzQMN-mziyf-*Ie9NGIil4~c<CuHwaMNV&V^DAxn*Wb%G{F!OcWGO{J
zyVX^3M=eH_-WVMj;P2-}dyx5mh`Q>yD7UUV<VYJJEhsA8ATTsY2m(V(gMf51bR!{3
zhjho#9YZ4`O2^RMFtnuP_i*3q{r(-`_YBWDXP>>+UTg0~nD!%Iotq}5%m`K)rExag
zQ1om+23p*Y5@<RB5R$6e>~*ZMw)+b+(ko|t^$jx@?iUqJrUj16x;|dC@8BzHZbH{r
z-U3+mJ5x!4+QRl27L2+SNW{54d-(KdRK`?J-?N@0Z>)<GF=og~_d4i4xQ$P#d*tzS
z!4-we`mC2r7`9!x^a6cf52JBGJf8X9A^WY|eCy_Oru0fQR{pJDw$w+*#W(SFiJ6Mk
z0V24+Yw{)QHHT{G(*E42BvC}GM&lGgj|9rL0&Ckl0&6>`FbiVLf@<>@LY9BN{>h3C
zd-nR&{19rDrD%x-6DeFS&bYn!PBT!Q|D}M>Zoa}8oh>yplW>-*l-hFScfTijnQoiM
zCrF+7Pbj@~U*gq+E91T9<97(RV7EqPKxPxy+a9zeo_-Ys50~eQpMR(+pkb>#pzBVC
z0y+r9Y1drFaE(=(O2$)+GemKBCW9!%spXG4qelzX>5|(l@$wX76Gn?D)Y)b9gf6Ni
zB}91+a&NU**XGT4N8+hTq>q4X`EjX!V^o6eWLLY@{P8bm)Z_ySzFm39n@&qp0RqAc
zU7%Iq##F;Caeo?Q_!&~XF;%1=e5n8k>v3+Zwd%tX1~T4MCzKLZ+_2dsL$k79kS`lr
zYi}zZnVpYh*UkvA*>{1gjjkY$6^>SU+26Y2Q@X@xj=bmo?B*NIo@{sBpus?zC^eRW
z`vuV9hgZ5{>HH2W(Vwl@gUNDU4w@|9I}bT-O_HC$mYo>^`uVG8fXhe}MCA{K^8BdN
z@ni%1k2?u^YI!VsSUy9+hQ)BlBnx|dF$UxHlDKy*G}0S0(+y-T;0r%3xi$(B2M7s^
z?py!*3AK6^hjHVJ@0A)|3(2nZRr+*L`YLJZ@fZK?$5`|_Oc+QIB+LdB4`_n|rKGg4
zxsrQ6SV&v3eaRQq*T=zxg_*z=*t!amQXz@CKa}*yU7mkV(a7U7RU1e;_s<+SrqkP8
z++Dls^AIh*1w%AllVK%-yG^x|Oz=^}_qR-Ot=Pg|-m%!$rSIju1Dp!H#uH|jaK#v<
zuJEt$tuht&e-|^D^(^xk`Qkym?TX(-Y7w4oc5c+KR&i051H6m(t9YJkAL+n_t*Iu@
z)yk!|fbu2h=5N;8{t%;=d2dbf#bh%HMYLNyoS)y8UKUEV6fu=r+|%QC;(7<inD}0W
zRsD7mWWk{>ziJEz2$c7VF9PT=2ywq)N3fD>gFc+Qc`|7J<i>LH(V*#LOSQx_ROxSR
zWlX~y_@vU)uiDh{)%j}_J{{cH;^z6A*wOp!^<ty0C!5{8ZpRo_SDda-ipDZIZ}|%y
zC(JKrnv9E=gxC#kep#5dfFl+!LA*~y8tj;eXY-8|*3!2I_VR#smC7OcCzH{D95@aH
z?Y8^IaB+M{H!(cii81SYHn0n_>S`R{5u#c&^Ly5UeGdJ^wf1Kt6idac?uke@w0mkS
zFzoBiZ$YGbfYrSS8anU3vy<~BjLrriuGq8II+e&X*X?lUjfkggYqC{)_B9(i#J_ic
zyX8Iog9pxS{ENbc5sM@Q5X{2Ws`-1gPS$9sph&Waq<-)qbiZsDksmQ%I9h2+=XgZ%
zJt*v?bJ`o5gf!<&?3PL%D#N^3;=6F4#%h@+#+9K;j~UoaQm@D9{@YE}ce{z}`7f!!
zq8+u}P6O!RNzhwE%q<!BQMJsRwsO<21dI1ZGNGbBgsySZau<DuM>1x6Sd0n=nv~cP
zF9b5>|CE%Xi&G2p53AIb1VNH>*1Ilcc${TeT`-NzFU6TAT1Yh!n@-8Erx?7KGfDy=
z7)*A?WFN=3v;|@2oO$!S^GXHk0_3Qx(;a+*Sz9y6n^DSH<wE^4EY!!NrWd0#!RSx@
zA*E^x@gDK=j&g(Frbd4ncpI{7z%&D#P^lt5ALj^C2vgUmSvQfR%@NeT&N;2!c}HeH
zRlO4nZbVom{l6qt7bY_(>ax=p7tJ{WmYy<lC+L`pR*fwf;cERTGs?kDjN03k)y5|`
z@?>k=Kf!iZ;qV3v`i@RlaD7WJ!4hRozx+KqF!%`YZ3^8-ihEPDmg3Uf+6bOxJ~U;<
z41f#*9b<=rIoorXUuf1w^kqdP^k9euAIc$e(~BYEQ|kTCuKj-GF{{1u=W@oAq$cCn
z1i=<#Mj9hKIFJ6659)%ZWr4`^C!EHDbI;E`A(K$$H^}zt@p-Wa0GQnzG^%bBQ&9Z;
z*r-!tV&pKDka(BTUd@}2b&JV+_Dub!*YOE9)5gPv+#le4O|?<!L6yE|E^_gZa1=#-
zN0B#0*9+Bg>)?I=R#SIqP|OL`>|3aK>IfMyu6M<by|<VcZrnQ@Tl@Kc_gyG@7PKAs
z3Uz!nD3LPvz|(m+WuU(^T`o1+>G#O^$vAB_T0M1?wj&`mqB{AC(6{jGqP7Rxn!M_#
ztbvH_YI1ZVXPqN%QVnUjIHg*Qr@;_c1GvWnoz|=3QjzPTACY=+aea1dXs7GgTCGKn
zX;!ovn-yMdtGcDPfe1&<p8hyLtj0<+gGS1V&BvmkL?D_c3W$Nk>_M{+ONsB{ARSQ1
zwC#zKBLfMST2vJ`8f!sdAcUbtXQ7OzKv`Y=j5TqqSjFc9C(>UYZ1XjG1g1BZ=HO&e
z3X$er$!;as>9G%g*x;mC<ecc=qHB?a&SWq5E`V)aG0jZr$>`p0%GB5+&AhF56w->o
zjrFxuEuN|<VJVpnrtVGBpn8FTWp91fQ2xO(h5nX?7%4t%oa0WoGe^z@59DDYKBMMz
zz%ID-dh_OTq!j-I7A3dJ>&yc*>v|)g_G&cBp%p*>$0dH|yPjzDv=a<oZP0Tdfy$06
zN{9?`+=^q;{M6)*hI+2Kr6KYte|W#%uIO1;X<=Z|1jboZ?PY*%!ZOflY6G-AHc}2%
z1!v;cx<Q;_8^p{7KTdhH^CUZ9#`E81DBzFOsKyyW8;p;zlk3c1przv<?);MKs3er<
zg+=TvNY1EN?%#BF7kh}>O{ss>7WC5`-x+0K+nblfU%>G0E~NvXjwC+4Hb1;b)1A~V
z4|#&xnM3p!ozMI>2tbEf3wTtyD2ox_$9j#XOiPSANrCjIE3o>7m{rCN`m9X<(zfp-
z?gy_eOMid#p9~*M#=6B7lB&;K;qN;#a5BM?PN*wOU;l^S6@PTaiF)5;LiBjo(7)4&
zw&fU{)xQVd$9(}_L;4>DZQh#ic2&;0%LPI{D|yCxPMH=p{C3?LKwO-MT7>guq^+WN
z6S`j(J^M*YzC(+}Vld4PV~kkPQ{s+nR2ieuYlz~;eQInF@dun@i2^C2sbt;zJ#{IF
z@Z(V#te|Q6ESn0+a^qZ0FwgPhg@P6F8wjD!>hhi99q-#8i%~bIUl3lSYlQzmt$jA{
zA)~p0?KNX<uAda^(qpqli{H8gNqDC}E9$);)`)(dIrB&)Oqq;~6)^+b>)-&lf2V*4
zz8}z==C5i3^BS;(M~P93<p|?GB-p()C}8np@(y9%B^MW`)-~oKYV^*U$5+YV0P&&=
zz#vTy+Hna!LFf&G86qzxPW#neD?@al<g{p55ooZ34x-Qi+qbMUuF4*+npjS4SKeQ0
z>U=tXIO@1wi;*%bt;tEG)@<K*`i>3-N4>qIZcIXR0r|XZVs9|{vV8MyS6qjE<f!JD
zo4EYc8D^9j2vFN$rgZ&%&yd3i5Brt`A6~01$X@niH7Y3mJqn9Lxhcn++%RXQY(R|6
zi7bTXHOh4q*~(o?XtF42nqkMvngtqzPeM!P%%Be9XeRRe{v!HH8A8v#t`q83%(%|u
z_CjU0{Ldzn&%q~bGN%hT$WJ>DVopB&jWk9jex>5n)}l^TS4hRN@i(y?i*5Kh4Gzl<
zg$ND~%f-x6K9vlp;h$UNtrv7IYme!`pAcHC-cOC36Xz@RaQ9GdPW+^$1xl!Af-#!)
zyq{%7{ay*gtS4wx)^lqJDHkV5$95O8KrKaF;qwQ{eQ-8(3xvFiMd@Y>7U_^^C)6`T
zhTnf5s-v=UX0K1x$u<TtEo}f7p9z%LHiNFk?Kbf<V|RE`=uDNFY-&o?b9OPs-qDnC
zm3=d!J@X*{7Q1GlPl8rPnLdOn)xmourrIHPE!#y>7v8_8bs=ArRL>RYd12p_W;R8t
zd7<4D;ZQ+L+V&B#j!7Vsbky17v(r~iuGvDwroS3W+S*#_PW|yQV+G>Xmm~EDC@ia?
zt%c}1KQ7dXHp)DDn99zmlBYs5{7IrEw<1|a2Q=L$s?I5!{>&8QeRx?fca~WBUb!!^
zvZ4KmPQ3uk@jMMhmKPc1!)@=D+2ZkqjFZt6wXZONhH~E9>Jq!>lvI$e)l{CjTxL-2
zqhLPheY>|mD=`aK@Rr4RZ_(lh!BP8KozJAO8&tGqLfG;p9Sx3s5QM%so@!^c^1j~_
zWPWnk>8ZN=GKW{2@-_-Zrd37?f28&7>GnT?;VwS3Mx)o=gYj4_!hglFlsoQz&%R_Y
zcMvz^cUZJe@~S8;bRnSPgsbO_rBy!PRVtJ_yoU1z`ty|3Go|4%YK`LyVU+$XZP6E{
zPmkwtakR~e@JGaS$@lWQ)cC(bX|p~#I$8AaEA!p7Qy1MRDIAaD-L8wb!bDBV<yzaN
z8DYVOUj6CmI(m!+e^>fakjmsW0TvHjwZW|rUilQSX$ZhN7F^cyX#S8o5_YXp0O|b$
z_zuQ!C%KS+Q2o7_uX6mvs8ev_`&oHPG9&tJbRSb$Vhv4GvM!9~-#$e;y*{Ok1RP0~
zan(4iBc_uH=s<8}*00k38ypvlN$$gs#vb-yE{Fk-SrA(_zqscQ8Rj{tvQefDW1@x9
zhhb~f{g>I3G15Y127*}`XrbMYvrSU0h~bY!93;I{+SFT;eyV^Oyj&Qo!_bWiK$hs<
z1(ufA8AW*~ypov%6L9*ou)k;O6&(21l=#{_x>aIn=48iqHERa}5U|UBX5W+(blRD`
zL5E~IOtbLN;H!vL{n<f>aHHdrP<-LLAdzNYJJc)UDOFzpVCQS6)2u$8c))pl5z=)e
zDY%eP^wS<dUWM-p9eK=a*1r8Qs%))D#;DC2(!c7;7=&9;niajP{Sc)(Gkp^<{UvYD
z&?v8%_bAb|wtD@E{KP#ZPN}0~#^2X$n1g!{SvlM;Rc(rg{PY3sHh6}B9lkPKA|g<?
z*UqsH^jeBz(ZZF>Z{!Mc29zHuUftoDh?|sqdtU+Zx^hH@qv{U7g9K!ZuSFy~D?<Oa
zV$#igLuaGy<A=9T25o+RwVS7et7^O}VJ%q~(}Zgf6mj>7hrHtZs`1g~aFgr<kF|ww
z$5yU*8UWx_g1;mMHVO6lvP0?nUc}=_q5W?K{Wr7x#L_k&nCUF!iBaoB8m{WK@j^G&
zK8so;>|;V2%;@uaqoRQ^)&-We+ei7fz~IET2(q^T9g>&*q8D0Bm}apZTw7y)sy6mp
zkoRPV@8jZc29<0cJpj1sxA{m7%+3Cp2l&s?&8OD@8^U9o*5Wqc3~{}{tnaZ!&VJh8
zAIoE%yIo<6XER9hAfhO{nT-$vhntAinCv-gmxrC+oNPT(hem{r59l!heTR8m-ud<<
ztjJ8)J5kQH!UB6Vs_4G%wIHM*<5eD05{*UUfR_(zEO{mtXy$XDR!sU*O3;RHLqjbm
z0^<*Ry3D!LN`j3R9;g7TI)9|Blv3?-yC&|LDU<Dj?H;ltgu6y-qb&|irr#s{=X@4U
zmbcrxU9+hG0srH{C`Rt?a>tWtXKjRXk9v`u_E}LhhHzobg?q=hu97gbhqbofQE;Z0
zsVm*O$kMNTd4q|+mY?oVRy}R1Sy#%)9*`z2(k~FNVDH@4WVh&<TykF8VPrax=LBr|
z57wC9B~E<i=GMC2Ls-rck`nkCg_P~hI?kB`*8Z@eeiVSk)ZV2@rrz~(;IikTiKu0c
zmK@cH1AwD;r9I&v)&nBPmS`hiKEQiGMzndp{qw2wYWuwJLg6blxH%3NWMLg@wjIn{
zqmp*yx<6kv?x(lD_0jEz5XsEk%&S9p7u0G`SxGJP<3~Q=iqDlF{=E0Ny-D*ZpE($0
zEyaV<%b%t?%d}Lx*)dO!WuEGJ{`RDdd8(Ke1%EN|->Fo-ivY^Bp~HrF=&)?@C_3}c
z<6>!y#$KDi1c<AS(}Npf3eHR-R{gd>QLmIK&8+Oao*2DwNR4%ewOkAfIj{9}a<fMR
zgP);NEs8H2?a3V6a@Qk{R$PK&awdL!SLBCcN)T=Ut&~N{g0AIg#mhByn!T}I$^-DT
zMk0mO+C;Rzo)<?JlWw9eL`WtU?XSmwwE&ulAJ`3)04-Eb;Uno-)oxp$6)EBGtp)i!
z*D)jb$4E(#zdnYS8b!a;Z%WY^V0?j8Qk4l^NRcUge~1$wH(!BFDqdag-StaWbhus3
z0w&JG!=usLsAveneFPA?m>*5Y*ZAx^V82}P!dB^Zq23xb@Ez{^wE{Omg!yL@LSz(D
z_=ABgcr{YTT)=Ma1H^p?nz&nF{)r3im}Yt+%^%wj*AMIFi?CIp6zEa7PEFqXyk2b*
zrTz3YMmv84H(%zjEQX{5i=43-X^VnwbGdclyHi=!x@a+YJ*K?2)6IrT7c~Yl+_P5G
z`53!%mGO0U+A?#(U=E=`V+Ngp47l6YZ&Y)Cw*1DRiv7StQId^EAw*D0n8xC<cG&=<
zq5dwm0(z-wZ=*U70IzPn&cAQrH-3>enKG!aQ(*mJ4A)fhu)ie=)|VmL%>mj`7Q32Z
z{9)Wk$vYExGqS2yYT`3~)X^gPv$zQl=DaN_YSJT-13p`Gtxk${0twuoQ0*q<h6=m5
ziS{3+{M^fTAALEBhZX4QM!33tDAMeGnb6qa89m$xdV4tEc)F*QA$lwINQNMfv(<?~
z$zP%)o%|XL9VYSot1io5Oh4IrcN2*Q4;z7jy(Sq)_aSmzj2h&xwoqWihk1k5V*ELn
z$8iMzBA1Hm{9u%pKm?PoOutMdUGL9|j(i)s2^!L05EE*$$#3AIS7W6TvV`sfj>6y@
z1U%M-fNA{wB-%U2-<wZWFXA;+!B8`|+xZ?CH0CMckNnyj9^YVgR_&&=EJ7-rX|Va%
zR|;h2#7x;f8U?r*<PG8aEpFm8qF(n_xj3iy0fu<xBr;hjR!0xx!=xs|51J19vi8EE
zB?HojizLAaRj?JKwn|5=uV_Scemtj>29}3($d=FXg{QNve4*OTV)8f8_exCI@{iAE
z>Owl><<joINwZ(E%@L=5mUrEsRZ)x*@+Ul)wTRsW`hMujgAcF{jBbip;~)ytv_yjI
zXH-IAb1!@u#WY&f(1IaBpW~m=V|}`+-^n54G#-inyaAV|>^96IT;Cq*50r~zRFbW<
zv^3BXXDJYaZp_aKBI{*jJT6vox|E{)@>zAIzOtoe#WA)Oyw$3fLR}|yV}NQDyO#5-
zaY+ZL+6;cR8mTq{9XGqq1R~L46v!kI2Nn!%f*z0yYno{62UZ;#F|glPU>in-oN@Fd
zzF?M2+7WOX4oGT!oJB}m0&D?KTw*pkpl>EunyQ#lKZ*J7LKHfzL|v}z(H$1MOS!T`
zfu+xurOi2ig^7B72$ESmE%Argcbyl-k><X8`U3SUCj=jMbABBmCz(vSDM>T~yuKaF
z@g?8G$h;8)5Q(-R@{W?5It-kC?#IO-l=4!~LlJ?Ci*mM}3Qglbhj#PBcJ38e+6a5G
zL5)5DC?Ni!N`mOJCQGuseSjgYULU}AsrDRqnzsS}VfA|qi=gCU;$BIgBTeWTA#0)8
zg1B1ax36(e_{rGP1*z|$K<Ef}0^GQZz>JVJpS~<bM34HEg})IY3^b8kzbjj=-&(<x
zJ~Z7u6KITeFw*xcs1`|-8_Qd!oG!v(p77j41>29GZ@00bQs<{R?VcY8b3Jt?CeEWn
zQ$LlpVzrg&SN1jvNljmTZ50T}{C+$>DH@465mew!JKhj)i&*N}#R|nNxYuq-p}u-~
zTDpyF83gs*$`8vTM0a;mx>r202BrnZO=a|c6Bm|17Chji3oV}mVD%WYq<s2b+SNz)
zei={m?S0VJ7h6gv8n6l;eM`!5MYXe@09A;hQrht0#y-3vy1l$O%D$MKo&!*!0gwpE
zQU6K%n+V;<0+Pf>Sb-E+_0OU7xY)cg){Ok{z(}VC9jI@<>E%FAM^w~pp7-KUoSh<v
z{ngiZT$eT_fU>dyE3zszSVj^aNF1(=(E|53mR}3t?;Tp1r)g6yz8L#8=gBb9*3tbu
z^EDJ|)+`5O(#xva3bdQ+$#a$!@QTVe)wfr_3}x3uP!tGtI5;2fSR@O2a|yolAfhq~
zlZIZ7WfG3$wJN1V><?!WQsKdeRqB<wZv@8Amb;F`pamgL@?}K1di4O=R2$60qnBqA
zbhm_rnW5`$AY@?<73HQh_C}cAVRLFs0{@#B!xgROw*oF!ru_-AwuKk3;AsKKYN>4(
zC;(30LeReeWLysFg#z!G(G&tkrw4Hj&S%%Ral6P6@_8Lm%jC$K!ZPJ}0F&)^G9OP~
zW`uLNOoFs)uW2W=+)r0IdCRn($`%X&vGvatN}P)JF%_-HJHG}^*62v17odT;nKtDV
z>Du!@RxZ?84Gt?<=yX6zkRjq-Ll&vkJFeW)^Mc&(sU87u#&{7#Q8sBCwQ7FoJuXSz
zu9-WDHfhwXvQK~_Gu7mcJ6@@$ocN<BmX;oP>GD8@)Qm+-az;%!ePb{^dDaL+s8Qv2
zTw$Tj#-52mt9d4DX@_WURB|lQ(p(M?Na)|(M1EueOsW4nv7tLa8vG9?76<YJN5XIN
z{)?G}VQ7MCti5|NBWujVuhbVXX}sWkF`9Xc$a>~}8cTJx>(s)wi1%i}O>#$Q{9@uu
z6fkPCD%vhwMQZ-1C{4z$Liua2A6$z=Bj$@tz`!pYg(tg`loKV3HEhHu)Hz{{>>7%A
z#IMIKQB=YWk-&u6b_)n&yys6zgtJ_Hac(MlKS7ovFkDAnCTD!7#J|`uJpjTmUs4W!
zg!FMLzC84jhk_Y(fW%eP-h@Z+A*ji5gZchFROSIo8AhK+IquN6T0iHiiup)=-~O#^
z@wBJ<wmXgvM-*=_U2Rsrjrc+Ag|SGLa!YwDm^*p>7Y76iQHA(BYjVA>_6Ucf@ts<g
zvBDl0+vsY-k}Apy@idDdvQw;2EC`B77;n~epOO%503`;7gxMl-0+*3E^+GS8hDG1r
zoM{Pk_3YoCR?hRqv&ax}_(FY0-GUmjL}IIp1t;Rfm2LTze<)=r@e5u&&^iUfjfsi)
z%^xy<jjNUu7OXG}u^bbLuC>(73FtZ_zp&iW6ntd|J^Au#^nN@OPU5S1Ge5`4uBC=;
zu%W^StOe+6@_nB9UB;sX&gMX7+wb4W@l_t2X`fI!DNygH>!Fw8S-1<2H8Dczn{V>J
zy8nkM7)k(4!9X?3P{Ru(gbaz%*32E^)GV&QX31x-akoejN9$%Ggt!CGNH7kt%82oE
zmoshgEWa;$%!1!lSZ<;3`C*#f`?1Q|*+3bg@fmFUfOtXb>l1uGa!^WuRG33!guD|t
zJ}frMYopW<jy@qpwU&CYGw!eYt;P&v01%w?-AA=2p=J6nH#HDCy>D5k*^$1)YN)se
ziz~*Y&H|w=zV84Xgpx43fZ+Zabf!21PeFv6INgLObYWGt(2Che{o9k`1mz&6G4c|l
zRDQ>(3x-_|p9=x@0=-()P<)#7`1hk<D<QgG6cp&owcA%+Mp*=<r^;<c_B7WMMsmew
zstTnTTl&%~s60F-5<A=saKl?rTXK!%U|JK@6X#HTwpXKO<^7n`6`zVWFiw+aAVc8S
zTn$gZGJCg*LK-(N?M2bEZ_`QNC>G$hHvFIMNwYpYls1{6Q5j+BTB&mUc7hs7H^Liv
zsn>Y+KGf2=HTpL;oExCE)RpK32vDPGc-B_?nr%kLh{usWV%(s&siye+yU-9YAtOMy
zk%ui!*&l%t(NJWiy|YI2iqxHStZn}>8n4k>fcte-Xfog~!+I)Lhmf|)By<TT@@=q=
zDT);KO2trIY_p<4oS#djI9#_2%T83)y*OM6A!=w<Ol@#L=$&^H(U3Q{+Z+Pq$!JK<
zDSiWF2v=D8OJb@D)}%?l+xg;?CZ9g5t;F&<^ET<hcF6w1wO(FQE`E*1k>^1r9z#Gt
z+fr>t=5a_TMrms;8EOJ-{y<lV^USyw3=%qeHPO)p8oXXP^E=V5@my?_P&REy1sS}*
zV#ls^go}G)heP$cxrQ0B$mJ4BWqp}Tqvr#&yEn_Q%);~h|I|2r8n6N3gF^jmrE9hA
zMO5HwSy6lPo<6U41{t34$117IrTn>kfV?c(#n=~E#7{NbQtRf+IosY%nJDTV7sj@7
zb5%GiK07K5i2pUVfVX}|QOev<ZQQ?~mmk5)^o#eOf;vF6h!LxZWhxTmejuI?`{y*=
zn@_HV(g%EHjEt1P&Fn?`sC)hz5!3r>Lpd>2x;+7x7^5g6xOfxb1ZHCeRk$eZAVxI+
zLe{+ZLcNGt-NkaG$|mZlBRrR8bMi4U@{e{QorkYMlaEfDL1sj$40N3RZ*Ke%3dGr`
z#BbRm?_&WOQs~a`au$#=NhR7?#@gw(<x3WA?u~ukU@UbEjZ-~wTD1J;H?ux;Gv}-X
z)0ec=kWy!+&yAqSU3f~((5-}^d;Rrb-r%D((?emkOvUH8!7NgLBm4PdAhQ2?%+$->
zssTLGpg42ZE*D^C1So-*a6g_Hxb)+*89g8T&D=_$Vwn*n&wjmlPzD5udqm=KdUt_>
zQhO?q;XdnKj=<W<mh$-DYh&Q<+87wFPiw(LR;In1nwFEqg+WuvI;WSr>+(Z}J?J`H
zm31FC>ge2u@J!1Dd%vG!P@$%i{xW1OzV~VU?yuLuVs-z%miyOtU+4P0?PtNqaO2O)
z)#1E47_g!6&HXk}Cx>bGQTowGV+vrK&yrk(JM-d6fXy1|L6H_*rprILTN6lery&@`
zWytbAt*2N*><{f^nO<A|O{IorZQUZrvnfmOa>QcD`#dOXx1Pi`ev9q6VzwxF+GD+R
zwy)OC<l|fap-q}9`qz8KSg6-~=RZ;Z=CrT<IW&RYk|@vgC^!%}ci+U-l43|Iuu*R>
zVLaaF2mSy+3Zg*q^A#&alnP=7@P()k>nu3XnU~$>)6#J{{pPo|nQyfkw0n#=m~}OO
z^*|rGL4(x`#Feg~z=9hHsu<vp_^i$fMFF@}qTc@-mNk64olb_>dklYsSbkdk-x__>
z16Fd0tx|{<|L%ohKkFfWOmdG9)o#S?7)83UJ=rMi?RFbc21t$^mcQq5(?=?w(l{xX
zqlv^ZI6MSA>PpFLg8qbiQ*XAoOmdlccMG+5FL=yQ&$W3xnet@glRZu;C9v>FSN=Si
zy$tk#S;(K=Hpu7oYUcZh7;g>O@mVYb)P=h6O32%!UqGHsE#U!oI=WC#=J2AeHJ|u|
zOTsFy!&j08AeZ4LS33ly%IK?s*#{Oo6*qCL-H|mwFQZ29ZS43`6B)KSPbb_*r?mht
z<fHhjiUjVYq!b2pZGMw;Fw6JkH$cB({mdWQvq@oah{;_={)!KaXOgkfWLaN8^D`>Z
zYE7JH?x*Cnlk|f~$pd}&WeDL``pTsj;bZ!On1~FZVm@3>hy930l5$^z8p3=g?|~Yx
zV;n1)2-RRomGlmY$j6}r$8Kas*O(9RM66U7DBU46wPt6nyl~PNvWcNL7ssF06;ob&
zy4vOeGecE6Ka>{gu34fD#3O786?BGU5Th2?MQdKZSR@($(qH+PeS`iow?=Bkx<+e{
zQL>({)>)A>qV{i#krf-BUzSS^LZe$7J^&YO4bU?m3+FZ)FjT2rve}kA9gwHU>n{ZQ
zMEuGDBO6Qw_v&gbN=5U4>M`Psrbm(6%DLU?OPOTf1I_)V6=E^@fo+7t2^ZpcQ$ezM
z7OsYns{!PV^NIj1rB<UhMSCXffQ0sB&XY8|0$hn|>AYsj`~(Gz;+>G$x-U84O#bwE
zFiCMkF$2oF;;XUI|H)^-AZo+IexI%B=s*#1x@Q-E^We`!saRU5nn612-`Mrq57_<B
z#GKttLhs_i`(LZr%y-*Q$n$q%8V``lntbg8<~x{oycM+^DWb-Bv~WmQ_(2aGd|&m)
z`4I1k!#y$o$R$7(HMD#pxH*?nzdF_##H3lTNt^vZ5+9$ROM_KUjQML{fziq}gK<I$
zuC7`^pH}tJ6>w**E)1S5$lfrq-cw9Nof5!Rw}0RhgLonU{#$=e51yk#gXLc}wAtbP
zC;5Zo01~iBHrk&OOBRs&iqMPJjw`EvI1DM59OJ_M@Ffed91hpu08N6?u}@NJI0_%9
zU;dCxU{94Sh@k$^?yz&<lf1$g{c1;0{3u@`pA@&qe9Oj9%Wk37^eb`ZK*nH(Dt)b{
z7-?><JK>WzSj5k(YiP{p`dV=%(V8e==2HPvanH55P~*KRMe%(~4l7;sVqa0GCxs6E
zxICVJJbr81TVMB>BCCWKl_1GQHGmcMAq)_Q7!rsT*c9yH^0X8v<DQBSdST&FqFgS%
zEAw`nJEA5Px_H5Vj`#Bx-%<z5PjiTZ$+<p!fXC|Ba@&m##}B{-K;oH$0O%uFDHZ)K
zeKFtSl;Q!il`mroXU#%a4;1#ScmgzS<4Gem_g$Gb^?BKj%>$fz3XHc_uy*Y)#}`{0
z&6pe0O76dpk~)3Qk~reIJ`BhKOeUwAMS<8TU~?sL(|vuTb9SV~3E6mJDOx}hT2uy7
zTtn4boQwqJpPl%j3nvU{0R<rRD}rG_yV6s<9O?=l!r33Ikt#N?M0ey*X(ML-Wq7#K
z9jUQpw`L%^<d`IuW*@EolH~6t^sM}DhuK-1nBLyiA`~Uv81GXdJDmDQqZFwHC06qL
zS6(c2o>V&W9zWp^5M@#I&Ine?ze!Kyq}JidjgI|bPe5ihQV!y4*3~obALzuV;@Dv(
zR^0>HHM;MHB_F-iYqd`R2;!dm;Lav9Qfv|m%&96{l7IP+L7FZ5-=G6SXN%E~qF#6;
z%A&=pQXLPJTpqP}90rIK9akNbb7Msb2%QnQcHJpFIz7ocS=tK#mW0u8o$@)d&`>kb
z9}ioX`kWt3R#lU6*<*xuzryy6GmM&^P%!6JnZ6>cptBJmF(RI2*KUl(hAmPgaTq@p
zHBP&In9S)eR66eywIjD`EQZZT{Nty8dYqVmOU*m;<t<p?dQeybk2qC>{`%lSo|IKR
zcxYPXV(j44=+xK=AgwRbDdg|hZA{2lqOu~hyZ$JBk?lgHD<rx;(7WLhT0lvnyho3`
z|IMB8T`rm6$HgZYuSW;i+gGw$n$8EuH~r%3GRq{J4}}^g?4{?Lu^C;5B!eMCPZog*
zSvOfg@j`X5Rgw&yBF#xGO%0VMDb&B9Nk?t}<QRjtK=#v!?#Ii_X?nk%i<WN0kB79w
zBLq#Gw{a<q@H5u-j}Okl{*VU@twP#=Q<Knpcd5@u)bGBiyBbt(*sqguru!7|qk|4_
z$J;ZEhm4d!=bubKBH#o`*q>ZVMFmExd*udWqO75e5ZO=3t>^L=_pn90<Tc@M2jjU-
zx309VxN{i^D1@VL1*x#@4K~4k_uKF9mFDX*XirBJ6jwLavy9meeH7I|qv423_{hVa
z`(-Oc0EyG<Et!(#Jx9Kt`{rvpiq}^Blu&^zW-ATi)5Cc520_PDK$!Z>4@4$ABjAbR
zZ%Sp^1(!X(?Q$Zw-vU!9`9Xl5*+#Ch0?0RU{DnB*%nyu~l5LR4R>w?Z^}1|o_rhaV
zx;fAqC;%Yu2ET&tJzKNgUUEX)14u7ugMiHijau|xTVWBp96(QT{NxI&_mQCU2Ixt#
z>T4g<?j&qNO8z<?aq8V>A@ARz$;cAf-dDR>fTU~~5>Hs`bcBr(wC1^;^-A2%oTEX)
zTAJz#Vr7ViuhexIV97Nv;dkSI70teb;(BA-qU7AQZ5r&v(j4Q9t9r2|eQm>?ISfb_
zsSRg<h<50Xi5$rspvE#sv*8TzSU~^uP+Sy<@#5cljsJs=p?691Yex1#f=|FPmB>!e
zrF@7bfdM<7E9Zfa+D*Q5`J5?2kop{s0OpE0Q1Ckjj47r$6(~Xt>J@9`5E9T8Ohv4|
zCGL($y@zcJtzB%fgbLf7NVHjTo5W@;A7$HA_PJzzMpA6heHBv>%yQhy+%dr%3Wgaj
zj{D_oE>Df}Xu^&IM#V4f$ByxepuZ>fE&(b;pati|ig0hHtI*1iWk$_A^NtA7CH<EO
zQGkNWs7fOURQjPb1ulU<9-jqzNbjYknveRA>JnjP8ZDIVs>zRy0G#s?R-xiL6nqI5
z`X+Xr+`p|cQsLmE*5vhIyrrGp!15Vdl-F6?K3Vf-h(6z0gbi5_TCNc&f;Iu9eDOdn
z%S0HN#PL4LsQKiGe5=Xf1lL8@oshf><_P)(gwhWIkZJef%sxQbDxe1qgviELNr#;6
z&ttjI0ol@&0Ct5*!K{kCpoJne!Ef^o8tMh03y$&Kdi_=}a=2!~Z@H0%fYvSk;ntYr
zPdE33<F*yik~_4_a8Q^htwF2%X$Q?3nTl>j%ufF)siN=DMl=UQRjwG2az{%((8qm;
z0b6_`jv}NYUuW&-Z5TEix~D#o$5Id-r@{NKj*%WSGcZ8{Q1eL^DR=u@!Y>3b99G-)
zDY$bpidw2;pDJE6<u%ybj7^_Rc5!?!rZ1g!5gR)>A46W_?vW4=rIk`Ke2&*K-H+(!
zF-c|bQl<I1;*AY0nRI9v|MCF&E6*~i0a5c|1FVZP`6x`3=n1z#`}8-b1wY=T>a|7m
zcA)3qlDAS){TH@0!OpUj2an7+;QqD@p(O&am={WgexdAHAjb^b0sLlQ7NU)VsJ+ry
zJ+p$1plbnBQThXjPS$5~m9c;Th+rIDmuBgjFX#Iq`~7*{*a8PUdTjGTm4YnWqIYbW
zG8wki@FZ<fmFZ*2a!Si*K*KrlE@R?6wLMm%wGr{Biyq-Al<646WYP-MrFjTIfTS)}
z9==?fF(kWyC^F|Wq)^$1Qju+B>aTpje|ECbbq__mJETqhr7X~?FE0pVZ9r2D7FeA#
zuQ%-gp`(2?Qch8C-McL=M?8|5Up)P)sm|urY`p+-c-t;lvW`kLX|>VuFctl6gzhaa
zVb-r4Z}XL8lGM75Hj^hnEMoAm4Zm4BIg95#!bMg}zd%U;7fB`Vkd#Enmv1@`;E#d=
zDLZtku~DGrV^u<mT=0vxafyOU=DgM**nz{B0d-lh#2Iq~k>f`H;EBKx6J;UvT)+o#
z5STJ)aPCsj46^l8tGVWVOnv!rPbpTKXhi@J`F`5599yf&Z6i7Mw?eDPDP#i8wpU_}
z*&tJ~Pqqe5yf~mHkISASPdNc8X?S~9rE9l_VrfUUawKL&Hx+--M62=@DH{bV);&TT
z^9$L$Iz*k_Kp8=91)5l9dj&Qurr@pAhEm3=sYXR$(|fWe`!A<~kw=Fj7z)#d8GtVT
zJJf7^1g0w^>x816!x6*r!&7|tg#}lrLp_#7N=~tOdnSjjoB*RMs3rJIJ{j%}+17Zj
zy611T1Mr#UA(~L&-bI!uxb;ZmT`&aiikykOOGh*#x|Kmh2(Cob%JXI<S62obD;n(_
z>id$R0B9FOc~`zHhH;GGQFnvrHp-{_?wiI1WPBY+%g^c|2!e<Jf~4>J!~@c*V-96m
zOwcd3X403~cgiG4AQb=l64&p;R_azQ`rDJesigf*kUd7Ir1#T?#Gz<HMrShdQaUj)
zx&tC0$ANme*ZfDpKe$`Pdt+-!Pt}14D@zi)U8>#=NVpL5t?mk(#$C(bn#IF`D{;uc
z_qwYz`LHsSg8F=)?3{X}DoQ(#27dS(U~?WVwujKQqv`UjW9hL9Qx2lxi|-~m7a2b^
zjN!$Qa$3UWEYHhn;}bdyN6^68jLc)H>1}mKjg?K#3gw>4mx+btwQTZ^myel?`n>{}
zi1!@EyJD1wz~XRGjh3TTHoBe>c7;<$X2lK<ZDSV@Ouz3AeGscU|7F?Ohw#gK-K%*Z
z<%pdb?T$o55R3;5nES=AZ*Vb`mW*{wZy&I*UR#cxN>Y`7$MxFMY13)4J31Sa1s`2M
zO(TRXvOK#1E<KY)3lWJbLaRZYclz{DdX}*`#9suCMtR3h5MaJZkOk<k`yaoek4vN-
zc(nF01K7lXcqnrKMIFMRe}(x>p5YFf$StkD`m2Iz*8n~-E{^`JzYXB&mgQ?!S7zvj
zP7&+iL#!_(VvS4Mw|Qy?d7@pR<f^&2d3QG(W9sb7zo2Kb2>8UB*Kyct4&Z4to7>iG
zu~1&}wQE{;Jjy(<ayWKDraph4MjHbpuJkcjuM>=wf!OQF0Wj)tbQm-QJ19<>hd&z|
zRp3=%S#y=4)t?!vtz<uVoqUq5NGL6f8p5o9BHt$C{qjhnFDprAud<}QJ}l>E3vO|I
zw$WBv^pzyJ0SAc_Zs{2LcjFj#0*|j;T9nGk14Jy+9XCI8!kM6E0J?Bq-V~Rhw7V8N
zTx2F40B<&sBs`><YzgEt8|>(c=x?$KT63yl)99aZ$z8Y6d#jow<z8CVW*=?gOpf=9
ztZ2Gx0~m`wM8J~U*<9N)e0^m0cIo}lUeR^}uOtn7Df8Fy>DlDVwl!YA_u8bDiIv7l
z14*qZ@1~DMy?S2+ools4P?g{&72LDWmk@e$&vvre<;9-}nm5#qr@1H?#m7a(ZFUm4
zaf^my9s{lDVa)koEx`X?`u!Jo?8b|4WNUm+q4Yf={NG-Yjb#liT{r!5oxPX5-NvZP
z<6^6kL8aqrXii)PHLJB$5b>gQ#T?yV`*|abRI1JSG3jxx8+v-qnmG{Y>3ctw96(ET
zmNpk|Q@F?^gfr{);ib)}K%mvmRg!AlO2O>Xx^$)7TV}S4acFmoG37;`8>40JbWO+v
z@I~}{dYb<u4xith(B}kSSeWm?@T%C|RuoH{DAyl}tL)tRHl?bz`1O4pt@5)T%?b_@
zChdU6S{pguZQg7Wyp18h=tuYVf<DoO42|`YZv9K5IBu|t0mR13J`7O6zuFFJ-&unF
zh0lPU@lD9-?t0`(V|cPMpp8My9dnOO&u9u?0%Ks~UU({6&L9L$dk-Hib-H(y6asU?
zu*X~kS@UF*!sg+Rmplpx{UplSY_@J_TnnB5^dJY#Snk{<T_;ZoOb=7H02%^PyT=ko
zH!;XmyXlK;2zpd6oFQoQaCZ(7uT#ZmJ)R6SQI>YfSIm~GE^xHkPw)8zSP+QySJHI&
zHtQ~wu$m~4`T`H5vQ26tS+_Nvn)c}<X&ittK>Ih3C0mrm)70fj51M`OuHy9t&KrjV
zFGm1L@i2g#0)rstB%<zp9F1;!JZn9%8BMJ5WzeKF$V9pB@^yxwcYtG23!mlQYtaq%
zpPVTLr?s9lFXd97GYY^Q0r^EwVTMA!71IxZ<XW3Fx#c81p;(ms^pGB>!iS$bUD#71
zqI7z+-%9hG9MD7*P@yhtTbdcYf4t4#?q69O_9Yu*(tL-Ig^U0lQ5yPP0FFv6FIae?
z8rDgnjc|L?;(3<9q*3~-ja#X$I=6h-TY8%^kB3dOkcmWt1KoI4OsPn%<y9MoJqkaA
zKkX&MVt!ind|+^~)5M)RDi=^M7fp(&aoz+fQVINo7#R$WY_^P~=iqPy@+jE4Z?3ms
zy~9=S|Flu+y@y1r^MwR$uD?`s-%|@g1csw3Rn38Riq2;PcC%&I$DeZ4>ax?q+(nXi
z&zO~FvQoe#tXjb!n11Vd?BGUtu_mA&sX`(OzFL~5|2&vo(c|>pA2t1H9!57X6;~<Z
zY&fVW7sJEI3nVPCOKd>7g@4BZc9=A=*{T)k=Gp(TQpi+n_{kLepHwJX^Db%RdsS6+
z3!Fo`f~U>I|B{`)JA5{y;@-M;d4Q`<*>w;NwbDrFi+py{on^s{<w~K_oj}0oG`@q(
zRTqT#oYMw|B5o*u`3~!PWQjbvQ<RkUr3ys=j&XUt*%mm!(H?IcZLA8oJ#TTpmJ?5(
z=P}*m>M0U2X@Us&pfTd!q>8vCCiJDe0cz@MPWuH8A{d3HO%ca$#`HS805Ss~Ffkmr
zA8E2BDRut11SIiVW7S1Ex(h%jsbAeqd_Z0Qy)1gDYDXF51Yip`u!k&W3Y*ZC_brgL
z>=eAFk6$RI#{ilp=o;!&t=NZ-VsSXUgS5SGW6*c_yRxLkmwl>WI_b-I+ii*K=L)$7
zVeuyI@#WA5pR=%$Q#Iom)a<fb7VE2hQJq<qT8e4nUjRGEBE}7%2E!Kxy$9(+&W=ss
zqyI1=v{)joX!a3|K!?;}Che?c7rCwZX#_L1YO--DVEr@XuQtr5=_YSSW8~%Ch9qXD
z)#0?$fC&8X{rmu6w8USkTje*jYjKeHEB`vV8_yF9`Q}zq+VYP0Vc<t23*E>N@ri40
zY=X+vi{LMT>ML9}vuS`P^LUy2-H|i|``p?z1I~a#ij2x=aS)I&t$8nA-DTMkG!R3G
z*>s~ByFoealE-+VRFh&Hr_*DRM?;j`JRapmV`VgkMD~75x@gI0Bj=2z4mTjaZ4D+x
zZ4U}VIiO?>X!NOTjK&;&*wQgJ`Sn|v-^9Gki4gWE=IN4j*65;%U{A%#WvjDZNV+TI
z=PQy)<@IQ0mp!@0AG{v?gNyC9k7l$&e_oCyNV_{GPf9l1?0crzqWO<GGo5B%_EQx}
z9CkSQ>!5BR5I-sEN&WvotZ^5J2_sng|EX%=FXi3I=!d$T4j5eQGwp3oRItzi8NJ$K
zdb>>9*J8X07qrceigM^%aY8#63rsASCDOsrKG2^?f!_{jK(3Ic&71wFxDJZqvDl(f
zzlTshaYT0Iuv1SreV!wvZbTdjV|X_GzI|0*n4`3TW9jXuVNbcZLY<SLLw^I^9a{(i
zc$*a>%iWwOis6{JJZKDw1qLL56H#=L^WczZGBDCE#~VcZucMH}#NSjb?XHf=jb#D|
zM&$vYpxFVJ#W56>Ppe|LhL3}bGM`5KLBg(=POW~z^!0f`#2gtHXqA{zIeAVQ9A8I=
zeVhdtRf{99hI_D_gN2$yIEgWIf2MPj0%VqIM>-vk?X=K5bNj@K3=vtt7T}>+q+0uB
z5IWu9tZMPbAwG6B0ix=T4s?mW##KxV2>Q&pc$WMWdn64YkmTRQyx<o9#=5O>n4J09
z2ZY)@0n*v71Cg8)EHJEZw7jN<lK|Du$USby#*U{LFH<z|$eGKdoJE|vVVTEpEKlK4
znT%?FfL;_-h9)C;tf#1YG7*iIN`pU=@+{-MOh4dHxA^uYsJRt(Bai$;p;TN?!y|M?
z9d}8{A@8|=;?bsVi-3@P=KkK1%n5P_Cb@T4R0Cp<V7vIDG2HtF6<2{`X%%X!>kt3(
zhSqoL##i^BYdymOQ0{$wTud-5N+lq2Fn)&6UJg50W}{B6RgO1S)$B(M?@ofBpy5eX
z5Rdnv<*4Hlmsr@KQ~Z2Z2*CL~ICz7232;6mH6Tik1op&!TXe50f<s5V8d$@<d?gBb
z;Y&glC(S>OgWm*FH2^OZun}Yl9JU1$RaH1Z7*Wz#_2RL~Bp9V=TP*sLGT;En*v@v!
zNMNQa<J!6`FcEx7M8EYaHp7@uvX2HXPTlSgw6Y(n2`V;U8~M|-8>NCUz;#O2f<ia;
zb{F6ZIodU^J912ZsU95LHyXD2LCzC1L_0MF^3xqW@RteUN52O}EzAvEg9XmrTYj+K
zsbUKZguUz+)Ij6|X@G(sbHsoM1|s)r7Ct8jee6$d6*3TI5l;gKDpv^NScs~V7#^jx
z1)MZ{Zn*e)vXXorUxsht$;8}UdE6n8-DKoa97jLU{0-@z>x>TS046Fv?jRX&l~?LK
z>v@PA&S1XNU!iMii2u+=%OunlB2@29Ood#W8!jkPHytJ00g7bm<nfjBZzr(~4l0&o
zOY(XzzmxCpeB+&g7kZn1-e^6`e1H$DIn(zY25Pv4X~ufmm8KbtBt&M57xBG3=Etvr
zg4YQt;-5tSpR@nk?2bI#*qj-d$3jgimRQ^50r~}`heL@vrx-P<7cB(d$fbQjB4Qcs
z=(vZ(K@|gYype6cb0Y9(pBmhZ%}1v30(LoG?^(iwO}SsH`LBRotHVQ&e`9)kWROt^
zs9FJ|H(0yTfv@Xw;!j}mGy7fIaEz``(HH*y$1%V}4=@faK$|9&k$_64Dg_Or)=a;}
zD{B@DgDiV7koe>y#}k0RTe#jA#3m3YksAkuOMuXMvckhX6i_pbKo|Mvh?4kgt!v~u
z#iteQq2NMAdxfPNYXRS7Z~pPJ@4SJ~hPrq;d8e}{Jtx{A;R<%I<Auv>SH%?Zxp+YE
zGaSb$a>YqxMl=wdph!xE^q0I>*;`~uKc_--1Fv+3j_CrvLK(k=wY!H)rs2?GJ1~*9
z-~Yj^ehFT@&uewuNrf_J)L1_=?Md#p_m)j79<5;Pgrr(DExQ*2idqdx>J~)`<ksrC
z#ksy#07TXXIZxyS&k-*Nq|wUVj|mi@GPk<T)qweM5lQa^$7lh`aYF6Pj~e57tWz_V
z5Wt`cYDwzdmWl0BMBFhf1GISO9vsE%Dm1pbC6ySzgKYn-<m6ojZA~j%_1XdTll&eZ
zRzM)+JjxajYLX}yLK#%PAgPe-!xPA1k-ey7^)@-iH3@rTS|=5RO)vup?dza47CSV<
z6cE1X(w@p&su({)z9}0i^?LbM^~BmXCdJ9h)*W9HEfs9vi&Uo>v)`v>wQMXp3COOV
zQE!Q9?P0~}j%;GOpCV}BF9rn%S$5-Z6^J;y{stq4FNU;Gd~1dW6kN$hib`X7U@7CJ
z1JFI$cnCp(fG|MCq7jT_Mxh6K)51!Ei|CE#uK|;osd}_tjyKh06E_GsRqf3^0XWW|
zTo0u}{J7v&ABjwna{f8!C#JwZnTS^?gY$^~CFxoi#Pc^l!RA=F6{?+gG%uX46GRM2
z7n%+klJK6}Gu|orRT9RPLrBzq|5%cM9F!C#k%sMJ+F<)|R)AhTE&YG~L6^#v@0>R3
zH0;FiJG$z_mw)K0z_Y!{w~{~pa2-(YbNjvTis(E7W5J-a(<9!1(e9_YcMi6vLt}Ev
zgrT~h8)cS#r2rD)dgeMRFgjkEX8<hxb#OSZS2^IQb@K%&C{Ms60_haU`BJx^Tkyhw
zPXTcftpUmCZC9vxr@CHYb`KWjq?z;HuH_i-N1$@h{_24WT~p&xl7pr7;K76TyCm~k
zo$b+o)a3A8mO0FKT36%bfSOdPv9>A&&hZYml+wmv;<q%f#oun_jXONbb6<w$aBkm=
z0ty;cQaXN)m*G!-@WKwnL%wyM-+PT4W_7tf>wv?7z8Bf2<pi`Q&{cO@0c>(CPayJH
z6}I>%(KtW-k7;Z&pWs1^g@Hn)Ab!WSKN|EtvxfDaeIAV;#~tg1dM8m<|Gq|~uih|$
zhBRDCT&uxq#Pr-hP3oQI!V_xuT{1rfKhbwImbtVrJeoiD1Dn(&6i9K8*kO@HdvO^y
z<W2{O>_V%9kzPGcOBAH_s`Rh`=qeqnQIE+R3T-C0kOc9-O^K2GzU#Si2-Hu9jwjKI
zSZClxzk18h5$ErbR<o&q4o(c~Gcg?q8^m2I{V-L?euFme2Neq7V-YtOJgmee(v3vv
z*Wxf+%LxQ{+mx7n!JU~!l6veKFUkg*v8Q`Zj$w$K`F!lm0=>_yj9phj`i#7k2=suK
zXr&<Ii0jyDEUWleVc-!BpO4VpG82Fa!opG{(~3z4s}-g9)lJ8=<VE|8veuCu2nkLT
z^Dq7oUbpn8$gLge7d>%&0x+okG=RU$DxIWCaWM48plU)%a@*XHGkBB9&HVPnXJR%b
zbsfA7d<;L*6X(Tw3?v%nn68ZYlS$}}iY4l!3O_>OFL7C+4n*-u%hl|OMnG&)?&jM&
zr;Pyv#2p=MWW<CKhizoYo-FJWWf-qdU)sd7%Ha||2;Dr>)hLtTk81q0KnTY<2NK)z
zG&7t_q;Dx7@cvzkfvKILa?wojg=IuOL}%Z<*P>+8x$<xz@3iliU7jzsY*EN>cZM*8
z(G;lD(tqfC_wm&@P#s?>)1R&ehEGWP0Zk__{3E@bH^0ePK|9T9j7aQu@K~2v!{aRb
z&!I9NeK{d#>egJCvYy|BWtrs@87i49DZdI@d0!nw8<EkD1YVvols1CfE|bwppA=U|
zb2^QS?RM9?_pg{93#F2@OYql@6cTD(aFZY@0viX-y@87PDo=gxuNX>2o887*9El5k
za$z#8T+U-^xC8~=_sc({1^8XZkoQW`8zOKsLy_i`H|p0fk6xWU)zgtJ2kdStU8^2{
znWy$2cRbe$+LxETz)uv=snKR?uu++mWB=-xoW&8siyVHG;zZW1AGRovj3t&$uaRQm
zi58E#E99|g_U-4cAjbZv^ElS*1mfk5<rPl=hCbf!%|Q*en?9ra4X|DN>Z?;*^?IMg
z((H=sh5lR3Q2V<(+3lVQn#P@X3^#f;2Fhqifb@T~7{wIxxhA@%=Yg`~Sg1Iz!aD-z
zJPBrd;wKSsG}2*!64<<>G6240?|Zy})oG}9gb*LaKTqO`8d`n~Y>tVay{-S2pt4KR
z{UBVg(R84TJE6YTMoCrDl&SzxYHX|%vj*T-F`he6z(d$-t@n4M-N*VSjkday^T8h<
zcu}EOcTDRQXAH?5+C8~r&-9rQD*yk>|Hs%@Mn$=`VGoU@3P`8YB3(m+g3>8n(x8Bp
zAT@x55`xk>fWXj5N~0*<UD7#pH-3B0d*1h)_5JwP_ea-a!86Z1dp~>c`?{}qj1~@l
z7!$;ZHxY%}omiijYl6SSH35s&cZUbMe{WrX{dfBd3IT$Jp;x1HdeiUYinPIn0#RI{
zJ*rN!|5hp}4;OGreg?e9cTkThM!HJ~;F>TnTK92e{`JuTrP|V|P*ZmJ%p*J>IV`}N
zN9imGlK6(dONiwIb<+%`eU~0R6n>KmXy^7LKi(Hi3&Vf8<__nn(W5Fix3!uS0sClj
z_nnPrqOL2Mkq*xXvT=w3FB?Bu*cUG|*mLp0v(Eg_oMfCjJJdURbSUMYhCcXe8lrXO
z!vCE}(xhiB;EvB(_`2l)_eldh;HAM@t~B6uP%v&MjbguCQEIIN{^UqWICZ1{X_QZc
zcTE4g{G+Q$t()IQe^dxw1ChD*K(3ge{cMQ1aa#W4B)p@Y--b}&gjMCSqu5Tgn<wf=
zV%OS4LdtJl#54I(nF-v=pdo5Y5W6Y&{^wS3F~Bo>7ZAo5%(fRvT6PclMe$k!C1pcF
z<L(^yY{ImFgvw-P@1~JWb;fg~D3V`vtmLth&{h`Z=e8jy;Zm&wQuh2md;8jrZZgSL
zZnVV))zOdAq~z#kTTvtvl+vpUluf^)NY<#g_2^K}6#pO3!~mgH;-aJawXRrNMXbH9
z5_DP>0n@qE9OV=P09Y!4w@g)bdY@^<pH3o?g9#1Z9+~^YYs0ro(<^PaJ>CNO3gPx<
z9}lZa-H~?bYy9#zws%>)W1js7@kZRY`CH)r*MF#)0q@hrW2*^ApHoKz7CPq~qX@v=
zI$F5)F3>6A0TQ!36gOQl)Uy^v`EpGj1(Ks-!24WZ&%CaDdnseDoh^<MvZ1MyV|n@2
zzfMEd<0uRXG)|wO=s6M04(p*H?QfxEMFULT+d3#5UFSaM<^PJT#{o{cfD-7$QYHSB
z^voVh7SDaJ1&z*_J%+<&CbLki(fcwam1=r;oxOr|q7yVe0SN=~@`>016*O6%=;#Z;
zj9%kC?{pzMpX~uU1D$+>Z#8~#ngUj{e2R%|ytDwGwjXy==6%t2h4k&jMBRi0>Lw(X
z9@YQNI|M}703PrxR4G2NYrRKbD1ud?jF*twRUQ&1gqnHcWZ%$Yzqj-od!Od*HeE*H
zGfpRPE26-rcys|nV57||*VyU5>4z<Bqe{bks;Wu~R1{4$FPX)V3!>^|YLc>zb(J|f
z)5IFUKJACsIA0@pc^{+ps>h=pT}}TP98}Rcp8{b1wUlRE#CNfp4A30lQ8}SLS7!k`
zbJuB~Sk%l5iY>NZsJ}e_e(fDAWQ?t2CR$Qe)9L=#nt)jdo}wA^C;Wdd2UZvZF!x<#
zAEQlcFjT6ydw8&M0#;1GIsiA))=6JY7XBCW2Zm7LVBPug<av@dDK0tc?FcX8%SYX3
zH268wQ%|p=e}4BdE$ZIW^tx2DP@=3XzX+?)ENM4XBj4>q^_I>*%g>J|;06|$3Y9tj
z{Q<YJn4W^4-+9m={`{Zc%|fk>Oll8Jn|z*~sa+gxG*4zp`d&O1!Qg%I^US*6=Wvx!
z!uKN9?`$E|>7)m7zyOvQ=1=9s;Jy9}HmCXbI=UB!H^DTbU+2ay=r|+N(nrFg{1oJ}
z!gqexYfYA0sWea5xpPiVPGV`YE5$z?Y4Y`UadpM#9T4%{V~BgG|4B1P@rnP{zCzfp
zFrI$4d?cGf6peuAo>_r$y-=}XO%x3cjV-uNfa7Zymn{7X>-||Fr&$`LmU`n`cSX2?
zrr}$8Tb5sR@c!NAKXmga3$3AAU}r|SZ43!`pS<jf=c4}I;Psi$rjI7M{qeLnc%IV(
z-EB_-*z!>XS<2^=vHv`8A0@1pcaRVD%ikpn*vUeuVdxoB{<-+%j|Os8Qr1Tcbt`?&
zUF-+oXx*UtS5Q#sisK~zvjsk~(!3Qr1aCN3CEH_rD)^}jIXU^k;o*{Z^{Z+5I1U}W
z!Rz=(W72mRG^Q%;@~wN58{`STy@3>0?k}|BC?@eS?^e02bTzaOMD4+#^>22qBdd5{
zFFr<2Re{C+c|`TgN!j;znHML_ug$SO%hvlFqHM;BpD6!Tjmi9_`wso*TY#QgK8#B)
z?fUYx!a|;!oZIBf!AkcqxbFJ*ARJPVZ%s0y-|QzbiqL3v+y2a|hU=In8;mztVq9PJ
ze1ChohE&SWheNl72F;|>e)K~ctEm{|HN^2Aw@HuIMkY}U4vp9pqyatRd)St`&1z55
zNcAg24!tsl?TN2JpB_B>u_^iXh>eWPAhs_}Oaa6SAC7mCzuEz(npd)1)MQv$!KkVH
zB+=^gKYQ%$biBvR2#hoqwRGh)u{XI``1jPo(<IC!D(Ow)XI1iE=^`s+9y5}4UOziK
z8vqyHINls5cw71UWJ~{GZx61Or^<VUrku=cferp3ROZvUbirL@H^Xl{9vqB6mp9j!
z;&0N$y=;GdAj_SecL@+<wP=fAaklEkzPZ@smZch*s&Y{2pQx}Q2C#A}Eq@R;A=Ai&
z40J5(NjOCWWACf=fy^s@2kfCzfxNyS!T9-LCdL8-25g&7fVQdZ_e=fROQ#lk`4n9C
ztF?WtJZey)0#NTYGwL1AvF$SXXTe~QB@MGzUjtu*Kulahf>PdgLw8k`wuJza(BymR
zLP+hP05a|=#~4)bqOtQ0oyTnj{T%0-RFe3t7T=0VyggzlvgwLt7nd+2hD4rACzCNm
zHMJ4k5KLf%sy6#wum1`sPp$rvW8%I!7Fq2y*SACGFkA|CNB(q>*=7_;ZTm>y&#rFp
zo31#DcxVLDw&w#Yyg1&(fho$^<;c)}$pbU}<M^AUHbPZ$etJ*3N`ca5i&qb`mfXR>
zS&UfqXGxEs8fhbu#aqh%tSi8<K?4Abk#|8NwU$9Fw!1n^#2q<OgaohgI#>>_=D1Cp
zUb<{yvevF5Hbx7{#Z2g={Cf7VRcWN}6Oh_tKSb*oFcLK&q`Qy6a7M?)N{AQ>2BkS0
z-u1AWR8mV!{9E^nBRdxlk7_EUt;2X;xcP44G!1fn;ZseGuw3cpc&XM{k$%wUFo=84
z=5moy?I+||G4=ZFY@_#Z*q!?#_cn%d@I4%B!8y$Z8;k~nJ*wMl|4Yl`e}-0J=y%}K
z+Tb%eQbk_B2*G+9Fa(ckY;3$WYu5kt8Z9EAuziZxW%()k0Gje3vW3BFV=!AGmE-vO
zbWUr3@z;4j3_2x^ByD+rX9Nuw?aN?ZfSvkUPZEDDx2YsH2kidSMp-UKqi`Gj%@H)W
z0-eHpVvh3-gYqE+l%8}j3K1KgQa{?{>|0n-h1&Udi?l(NT1@{fa2{7S^8OYr5z94f
zI3EnE+rjcP4t2P6nQJ#t?=BO7^;miYwJVH#$lLlKy#%cO(g6r;cL7+SfiPS0%UC}$
z&3p|Ed52cQ@sC34B9yA1pM7SzGnAs9hXb)*YKzQuQ0D~hGq}iDcCC<fN#8EohDrde
zW-EMTdGqE?$?MM&-X6%w3fAv3BzF-(RB|BcJJ0v>L?d4_N4Z}n)3!)A2<m@x&bs}(
ze*DP0QmGWgl*8Ui$=Kl_Fof^4JO__d`6By}Y&AGsk=nqQiS%2D#Z&n`yr&mBe;>Kc
z#88L&G`&v6|I*gLD1G9%9S6X-O!dQ6@Rl0oVMV2t1wuoK>8-=~Jht>yzYc{wU&1b1
z`T3s2h<a_}tHsvL`A+@rSvjLzh_f-xeMC@TADi+^6jV<8C7dx1pNQJu-#<hO;SIh<
zc9pV10r^lmPvsG>?*-Wh8Y%CSo>**b*gM&_i`b%hc$Alt>nkF0^8Rsbd%Dcr7{9rg
zBf<9q-FFcrcCw84fZZR!1LzH^6k%*y|Llv>N6%a0Gl`G`EemMmgbaA|7&J__X^}5l
zD1fs_B~vR~bgIy+DM)CpTqu#it0{rNB}_Z*sHmu|A{j1P2*no(a|#(+QnI$Hi#kEj
z2;RYJLDNI;8KjY541gj8-w8abbOA}AL)G&qU?Eb0Cqwn{@p!W>$TTNh*7{UJiD;R=
zRGT}2t%pfhS2vtHs_8P$<NM|5ZmqBANvYbBtS`N7$jK@Ns6Ez>;aU`$la`pE_=-x!
z1ABa!A{d1Cii5F@|N1`f*vx<r;jf@7N3CRAHa#P-`#M?>t-~YBV9;c096L<|8O!H{
z*lYjg0{E6iBXz#MctR!O%pm4+ak@(n&%Oto$;u+&eK|@A6k`ll`1z_7Pse3hxkTJ~
zDxYt&4MfyQQUH11ea0tU$pJq)5ce_13FQ}l1hYT+KuX{V&mW8AFlq2~J4Wg>we|Hy
z1(4a_W_&_0o?SxR5}t#Gz|f8MQs69Rt7xLWr`i00lwG!|sYyI8C52inv&bC_#QPyU
zBBj$1_cSIyvfXY(9ZnmUxa-OP+lD|5WXruwRf?BlEu_w08L)sPIFbW6V=%tmI$Z1D
zsG0Lyq7q|!Ng(n9)18MGcE^6YCtX79G2mqQ|G5^fffC!pn+RV-HvXW+a1L5qu$h94
z$b`F!xJCbnXOKZx{LG>Pv(uQNsPe5OExFBoGDkfxB>UA)1g7qUvjk|>GY7QLVC~(3
zlGd>u5`=}rozKUps#qOWe;zXa7k33?(xMB9x1rzq!eb6G55Sf=6U#(ORV!GduCA_7
zIEfLe1kQAFM-*RDs+{H|J5YxZYx}Q`ZkcpQy*lg-UFXvx<N7hA(4Kt%kGC-#HE)mY
zhnd$Oga|-;Ye|^yWOe=;L_>=EoxwgmP|E#a-(FseKree?dtCX$1p{>++9iE=-R$q+
z9=tMR%0CXO*NxqNf325TQIXhYE<lXiw94z^M2gxVNC}HxD*bDNNCP|n4^syqdV3yF
zC-L>gudLTgZNY!C9*Q%fM9Yemtf4_`4sGBB@GVXpPkM7Y72|-RkyBhUtl7FTX2Zk*
z-j|p;TWR&1Bl)W(;dL~7xZU{)*$0ie%<O#U{XcaXNxRz%LEdgFKfE}!c+XCb&5OII
z$inT?{%E3<D{97B%3dzO|BcVGho(Tm3)ldV6=>%bRBZ+${gi~aC!3N&PTXrm=-qmg
zHf`P>oZUIzqoJ_wA7MexHemtzNi-l9P_DX$di((NgipI?F(a7{YQTCVz>ex#^a5^g
z52y)=K#-(7m#0ELr4T&nyYTwakC(4AQ`cK|Xk$gai4;>FVJxI-SCp#h*iRTrOfQNK
zqm()M<FG$?lZL*Bx+ZMuYn)iI)Bmi0poT<{+}J?!HP!`!7n^0S9RzO#so9@Cjr<T6
zkSyv<^+h>}&xoE_1XG2{K4YP-);WEK3c>%@*>tMToN(n&4qIfR$nV$}HiRRh4gSLC
zQPeW7*TTVvs_3fzH$BgQp)_-3szA^I=~*!Fpiu;j-z=#NscZm%hT3zTd2*ZPQ0WvJ
zOKG1P*4<h=IW8o>Dg?ZR0hCl^YozwdMkM?>w;@F_Jj%OW=qNl;KQg)I7DK`f%@cV9
z$=DiG50M@ltIOYf9}4elB(q}tLo8+`^Xs`K(BsY)!^=IP6Jd(w#Jzq6$XlgpGwlDG
z>JUzFjv@+V0~>=LzWZ#mzd}0^EqurKLaVmR^n=r8Cee+KA4=AAeDGvtwNtsTgf3S6
z2?z3D);AaD5ji$$8PR}>?zXzj+x1C;wQ74Nt$gK-J=!dOwL7f_@=CADNG2-uXnvO7
zZLve9>&D{I!MaogNDiVhBa&uoL4Zc2I`CIMdjp8iPrR$0GJ#43^Y%C3&Es(}Kbw3^
z71Jn3(&YHJILN)%RQrpizWv;<$loS!5f+snQ4FJo4K3T#Y8CNPEmmEQ=LIzVVr4a)
zqfRq{K)y4uh^7yWg<lygsST6=NstwBy0EOv6^JBZedG{w{o^_s2*56sMz@pyruQte
zK7C4<QYG|D{tpb89^XzS`43g*=5MOZR*N*j-aetQ)_*uYmSy9>8+2a+Xok#c)U#Jr
z8+ORTSBL6roPchs+iib_lEWxVz#Tc50<3+^j^COTLJhUrE(QW5#MlALas&}fqEqA4
zQrCZwCT%yAljos$sJYTaiclyzUc=oF(aQ6hj;egqeh;feN*(ZVSXghpXm;5bUqScj
ze)QmK$N$fzDWo@Yk@U`*Y%|aO-!(<1#ctoaVZ`3Aw|{5II5=PaoRJ@oq#ZMD^p+ZG
zN}CqBvErPJQ97OV@!NO&^0%_hd7D(@b%<+`;dJg*Ns*pND0W1uG|=bNoJl&U<3QY^
z0`q~f?ib*_;X)_`6U&P#3EBNVezVQ4zUDv{d-=}`#V`dX73&l|+MSjV6ejiHX>8(<
zO$S2qjAgjUYLkrZnQb8S+@D<*kfKluCaurUx7W`Wf+~T2I=R}Jd3&lZ2ErK<d752>
z#3(QPyhIMkOr^;CNSn)iXw9+b61_HzZC{|5>{RkcURH55Fv)Ku%b2w*Q50W=MljZ1
z*!BlpUJ96K2vvC?o&lCD!t9C4${73O+Jj~O(NQ#-WTDptK%11G%ExW@6DFD3hjDvA
zNz}#Yl0e;f=I)?bKOKRLVtgUENgan#{+V`Qo8qRpU0CI+3;mA|%u|#f_1cNEWB%9%
z5Cr<OFTawEfD@hI8}Fk<jeJRVr=57h3xti>&{sN&liqQkP_ScUGGF)s*+UvLd!Mj1
z9Y^LZ^Oa9n1XkD|lX}$G05gsN7g7dap_mgb(n$#sWG_v|DZ_#x6VYXd?-gr~6J#E9
zCTOgsr{fV=<=$$=(M$_b$#=}QeqIXvT}<4Lhp@1PcXcjuKEB~T0O};Id~8p*Y~X&1
zNEEVW3q!LZ>ig^w6)lN_2%4zYaGvjZpIr%zlE%)D_Oq8qe+NVBju*6LU=my3uY4Zr
zs{hbTtu8S=?ClSh)RV8{P8Un-AL9O{SRQ_hNZH}!{WnVEykGRud%#z$Q!R}vHDkF`
z>ys`qd4bXIbo)`KpS&_%<~jCN2!qZm0vDR(>cwxT8)LJJmkne7ysba_BrY`pHzu~#
z2^+-#u~?byd=Kn-dk)|I-z*jYaTTK$XTjFU>iKmeGGWIVr*fIAaM7EfWa&G+(X$~H
z5y|2jUhgoGm76m`b(MBMU=UTB-h8=&g`|W+B^a^bRbN$2&4qKgjbd})(ig2lc>({?
zdx~i+T-S&FQX`1BqR7R;gf#If2mx<0v17_Gvemh0E`gL;M2s%KP9lCt+;Of!HAROD
zF45JyzdR^+6NVXqFlrRq%|^H^*wSA@$SC=3a(;G+qIVpk9JtR0F{l<da*gn;VmW{Q
z!gQa1$Tx)p|M9mR%Bts6T;{Wr)Y`y@7lqr?xX4tf$GbL_l=cL)Wi#W(i_2)^_dZ)w
zgZ7INH0$|DEoMu2COvpdvc=EI5U)3K$L=J6bcdW7Vc_FEFGmd{$hPPXxw=FA21}Y~
zhSC#<Xy^B@0f+0^rK<(vH39}&>=FzHx#SeVO|Q#U!O?uZR7O=`wCzdsmsd8g5&9YW
zJ2dG5Vx<k;Rviuo$Rsc^&UegN6mZ=zNCx&d<iE!3Z)e^c89L{ue2m0_#OH_{Z%sgD
z`-5Y4oS**J{yVt1(c9yzeTIy2=-GYjDKXd^boYqc{@}1*GAu{??TxxAE8>)LH$UDF
zE7SnFJA5!l=|>`;|7Jg+;FloF%#8sVg2ZvC>*v4F3S-hRTtv|GDHzd(MwyvRfl)2r
z@$TFlCev(#m&I2$rvmN#j@jAt5Mufgdz=UO8VD3=Zhy)DoLr%Dv@ecRH*ANvKYYB@
zRISQ>Gz-+NuJ!u^QLbx!8sGxA1YO|#(C6lQ$fn>qhkmH@JS2{7f2Eb{i^T{+0|kxA
zh8BfU+}zwOZ*QyE^rgNZGWL)~cTcUa9R}RFIHeqR6+jM~Ni}rMTvj07aor~Oow_ZF
ziIQ8{4cqX@q=ERDA(X52!WMGyBmRj!Ms$m<B!pa?m_D8Y7ziy29@9%owlp};FsLv;
zFPgY=h0uwZKs<p<aH}eVaH&iHo=7&Z1a3C1t&UuMU^_p-2N`)!FO|6<31q(A!5`6F
zqvf^Lat!V4jK8N@+(#U$Z;xNkT{AHwkt_HFc|Zw8VcS(sw%wuh>#-Ch81P%ZrQg=f
zpTEapP_7I||MrxdGiOHJS+W?I^a8u}1BnS7&2<3;iI@lIr}m+@f!Qj`Z1a~^>qd$v
zPh!qTB24aY7+;mK6?VD#ANI-5+kcT|5QoPY<J$!Vkc7D_GMa??H;y<HQj54YVSbPU
z{SPE&mXFi%@Bc<~KN7T`U;k<iCW?&-WO8^^KL;;<<qWjlD+4!V8vB9Cx19m^hX6IY
zB{?P1XnVkPsAc=6RVRB!rV7BVnK^-je|0`UD@lVP3r@7P-q&ZU$pdJu@&JI#1!$Wl
zK#c4TS9=LuO$z-7RS-ksAILc61s(i0&H^(Q3@RaS+}yqd<CBN5wE@61lSQRzWD$Z%
zA;Pm&Kl092rY?NE*id0z+I@RU5TKiewt1NVM{w!hzeU0_@YSLn@ic}>oc!d2#AZ7v
zg*$t<yF=@_<Y-6yTGByUgUTAm2)g_tHPHK)#qD#ftd1QQ9!>XB6S%AI=9C(;{$+?`
z5rDB4PTl$uG#G|~)D0s~t%j)5ZOKt*<N_b}2^aA<k0Zg?eQ8j0kvWj)hfYc)1mq*}
z@DLoyfvSCAMQ3j^9>vnHUoey?UnYiYCr5Gl#wkxbe|Vsei+bYwf~X~FJwD_XkX7A-
zAm^dkzT&#`BzBJ{PQUxs+kUu66x^k<cB6@aql#9bu^h=aZo&<UMsN0cwuT;kvEx6-
zyFP%eISGF~_EK~s9-ZT<sTjh3@}WUZkVDFPy{R3bgM2V#Utd9KtcoCX^Y=}Ipq|2j
zBQ(@&eR<QB?=V)R!0B@JXDba$PuuXcZJew0ck%d6l*1cN><_;K2w$M%RkPij2>e|<
z@%y*e_Rh|j;hZ1&T#{G>KTiw6kFv2pwf%oc`X+65-|Witf2^8aX?7;F>iQI!cPp}D
zwWHVSNoMEBV4dj(;O}IIxUf*RkS(SK!b)R5k~D-JEuBPe%Aek{x%5s84<+778u@)2
zY0K=+Sj_)zlb}8$3Ew^XPVn7t^OF6*b7w$?mP@QK^k9i<SK=M8>C5G-C`#A|t}u81
z1mD;CpKsUW07PcXX&|(NNT$#U_!auL1OuI}tI{r=#M^av6sUkBsg6x`iDJ*S5$XXa
zyL0`<v9ekR81n%Hv~DJ{V*rSJ!=!@$N=Z7+qCH9j6lQMvPcPJ2wQ`l+mgLRqZMpEs
zANBAM-iZTlX1hyEasU#_1!w}NTC8bS8xBuw1p)p@z#n<eP^zt7KA+SKjs<pAz>l<+
zGmo_cdx%~m;?(@}3uy}aQnX6T)<=&V?DZNSdgqU5`O&bBx?_+MBxg1l4B1=|L(!=c
zshmKxYXzLhz@ioRo|VjA;<=Jn+rt#8lnUACDm1x8+Aac0O34hJ=fvd-t3?wnD^Fm2
zs*Ms84?zrdHOF)=R@KgW5oa@pesxx-J3}{Xb|4wg8aOH7lqai~S6TmhanWe|Oo(0R
zUS<F^QP^YxEwQe!JO`wq^V!;GrWelP(WvLiRF!YhzS~OG>e{sl^CmVZX)!QOh?37F
zD)*|Za>zY-$!asKOu4C&GKNYVi5jkaZxQ?cNPeZ1q=oXO37xOVXPnhvt35cIbX%*k
z`*H8xSjlzAI=;S_h}1E81mVtw_krfQ+ZrzR;00Ijab};S)kVG2+PwqKwY&4q4X0)w
zGWo*G{H|MQBDRNXKeyk%G6EDWmwn>Tb)^PSsi(O_J)yn&HqPUK(W(AHx581bApa_b
zpc^PTgEBC3CHrwsxAFK)VPcji+C|nj$5DEqv^r0K8T~Jp1O@G6qQ_t%%}jWRRWc+c
z<dVHlcaq*gzf~L7I0|?jSiU`4zf*%J%*tb3=e8kYh}FM7*DM9P10MnMY|*#vL#X&8
zS2azM^LY`xJAvEPZWt^#`DWh(uddgC#vUX(+x8|40*gs>`-w6}Q86(R7~Tu8^i{r`
z<O47sIT`}EYFlSM=%E<-UIivvCZ}%6UIov+me~At0MpV!-NL$~Zvz1I`S9Ye@bgW!
zURm25-KkTcSS|Ad_#HMIXpJm?s!igzaXBX&lLXBk`|%Q5m1KeUYcPjsvM(EE)$^I(
z)D8Q=r-?3g-n;)$9S1tkOyBH|Ga{o)V+m8H6AM&|-^+?3NCFP5yf4>>${DE{TgNeT
zDA>FQ(IvT63~K(!8)z+|b*<Z!FlenIe;v*MlFeS$$4|tu6}B-b2}kjj#fSX6B(Y?S
z{AlyouJXW0Q?)PEm#5N{y9p~TX`T~<#3c5lh4{-09WiX(+_6l`@Af%{9d3i=1_k%}
zmC`vd5Jpx@8N#mM&5Agl)t=P6_I9^wX26VymS}AA1Exu@(F*UfDE16c%|7iI;PuHL
z=W_>*)+}+u)LRT#3j<%6ZBR;;)Ow>d(bw9#7Z@s`=gQVv7OrbSLjuGr-`}|g!qWGK
zW#Y-<L}r1jBwx1EZbT1KnukP2G$y5BMsk<O%c4vW1a`z;&N90emMDGh$%c0-{;L-q
z&ipxI<-Z2*5YZ{(y4~itN&9Y$qAco?)x~_j#AX>4+}b=$Z(;K~DL;nt*dbkE5q#I@
zJ9CX`A^H7rCx<_hrIfIcdRoSmhW}tdp!*YZ3n{&)3Q|{&HimVmZp6muS9C*?plTVC
zD+ga@YF&pxL#=m~)$;Oadpe;)Z%_6Uzm4jn(fz|ies2P9Zf+_bGss*}1Y-++tP<L)
zS<lt^A%8R#)Qt^;LHrN}(%I~QPLyvDa=f(vk8z#bdtckiV@g@tmiw%%tOXj`*r@gf
zXd)_uBt(|8b`cQ~iZp)dI}j2GzHAT_0)+|hJ){!%<nbp?;_|&XVvPMFXgBn8OPLmu
zCLalGR_%s!3FrLx7oW^>O4C{z9&b&4{<F2Q(Fq~EQNV?f_YQZw9PDklZ3bz22xjp-
zzh$^N(FpIG-<|=OX40!|rMwnM$5ucJTdC;F#-{yfR-K~lrEj<07Aes>;<?&~GtMZs
zzMqW~%cnz?-T%V}O-#KY2dmuBgf07JZ1BPwCEwk}{2kLXab#$|JA7vOE61^7+NyA2
ze3IFtffFFJ!V7F$y*HA(_<LHo%k2DCiVSu2sgOHyS8bVNY#031**Ldp9PhEGkcDGO
zSIR6Kk2S7RPa1KXx7!~&X2nn1YsDl(O;qXkwZ<``THb7^>3m9&cklQ?M-@H~CPq5G
zMcw9`(BZKcJJ8<7KK1*llx_w5;-?|wNrzrz{gr&DSHW&{%D$#>RlT^+m;dy<057E(
zJ%$izJAettq!fW*@W_Xl;-RlLLvV2|8zJ(EBxj*~(&|6n-P&00{Op4d=|Q!PI!Q+D
z+LikuJs2x2Fah4taGsqEp;uHk0}viG4^)RIlkmmPOzraGqAU-$Ja+F`v7udGs%RZK
zIV?YdlK-yqr{hdj<Ve2PHP54v_S@(K=#x;`gEq5!J|Fz9k9D!xc6N4rK4QCrziZ#a
zGI?xp>mKAbUpAx~oT;pZMr1Z<(%mmlUimHjY#6ZH{QUNOJQNCfmh2ycUAfwuqEKv5
z#b?v~!;Nt42sF@~x_W*H=sjrY`~)`WM{2p2=2$FIbZzQ5#Xp%Jqq&mmRarJ6qpMSq
z0x#v=NBzf$0-$5h_1gXcpwUuv8OmkS{H`x}V<7_48-SG!wa_Us(yt*0D`$4ORX36V
z);sy2CE_l#qBCW5{7T>u%h>@YCZ;n>geb4ine(IR9}N)4g6G9u(wHE_;5!Ce@X1uJ
zfdfW4WQf56>o;QFMytZQ*9lHk?t@3l*3veZJjlC>!-k2#dTK=hQSmxhzJ(rl1p2ND
z`ghYQK_fWC2(0dFe7vriec*lZEN1gh5qX{Eh({~tVQKq?*qu(&r=tufU<eB1CnhEi
z3>o_X@MgZJ{J9^i(kWw>@J8N0t;)Wsh@zKlxh>+qAYI{hOM~G77Q}rhw_CcIoZHGH
zUhk;|_Ng!6FKDRk{Lf4THFMZ+C&7tKe!4Dq$gpdEF)O7mzKk<J*U=LyaTqT#;)~-l
zG{Yh1Dluv7lEyJ>pAiA265}Bj49g4~h6BMk{K>DJB%J*Kcf|^(4O$_K2i&n7nd03%
zS~yFf28&`O<@<1i;f+mC-^z#Oh83hhEdFZXfHsrZUHu}sh&53iV;O#gXXV(zz(<Z`
z5W`7&0^x@U7%=eTI)GVl0d4R)45|1M;Xco&`^L2-C=aAI`j(C%S9^HHQGVqg-@25d
z`oBV`{%<vH%=y2%DY04%d@{7L%-@a3Cm-#-_zIFh5;$Y|;<!!I%_IEaIu~t`)RQEv
z&pti)z5}e&!|}^sgH~qutFb!<*tf5o)cr7rEqq(eW5XCLY<6cGUq(cDrAqo5ta(G=
z7`7I;?&|VbS~jf_6qcEAOce*Y5-D@fqvfu+a9<xtB0;9=_Ec3qXxvqLpE_bcgcv2#
z*F}SKr~2EFK26Bui{IVq^4Q#rzKCq6uA;bj#*{pO(m%gUg~Xft8FVI((jBE?-$NXx
zy-R3E@$di(1z|y^Z>9bZ?JG<b&xackbW#U-fGbUI+5Awi47Pv{ftd7gVYv%fnEIAd
zzxs$KXvPrnkcwg}gcK&d{te3+{VB#AMXwspZ5FVJr@qJF36|Es+Z6{9fzU)#R{CCg
zJR+7ozYBeu`4bOj37Y3dbXZc1vGNQ09eT1Uj+XNbDY%oo7uM|;hru<3W`tcoARQ4h
zw1o5X-aN+Ga6H11@8AjXZjp=on{{n%Z(m1h@U<k2G2%zx8o;r#VN*!~<bg~jvK_&(
zMgry+>zdfDR_h@L#n^;Y&Xja~k+&g+pnpA5OlB$<Pu{|?Y7u_t9s;9vieATKoEW?9
z`<?v$X5`D8?7oI76<w~cEK0~=cPY)bepuzY2=g5%)*e6MoloaZ>++FCx`vAlXw^%*
zBM1u6QC}?SO#K%+smZ?A2xk4xF~+N3G1q8X=CRm}^PC_T&`9Wdw2n3Si8x^t1^;7t
z>s#reYZ&l%qzWJDtg=m8D7hexX7Ld)MjmnR${l02ZFDo(cfK}|DlS@srZDdfk%k#U
znD^FXgn9vY;+FMK;BK|LIe23f4i-OtSfE<S*Op`3s*)s`43XD?y>Vz=aE$WE=$Los
zGIQW)w^`?sG(AL<qwPt=mwKB`sx_mkoVECpOtA&xc-uBx()<TzU%V`&1O7|253Q?~
zKn_C%qQGTPIgo}0m@>vT+@23)lv<eHt)7xlVQfdAh?V<FJyf^(W<%d)SjIDIH*i$X
zWh2+(0t~!w6H<vvD!Y0foG}`MZbsMh0;=3gxw?({^t4!B2caF9)dUp!#XHiMe!v4H
zyH`H0N*Xf=$bR*<-LWy>x-rC@lJBQCAPMFvcmK(qIma6cOwy{&PS}J=f1{b=8NuKx
zFB~IyxDnBjC}n{s;eVn%ZY{}_L95Y^w{iIXSh&!*bJ=ZdSCsCHmCxheY0dsS2n%**
zE1G}J?`k|BVuZ!Bulp%Ro;8@{>zY;tDkV3*Y#DfuAezy3FmfSj5~B=8IqZ|4gTz;3
zq_nxwRTykJ^ohIYhikQ!c>034Ez~V!iR9GS>F#TNv?p6=R|65DD!p2Vql4wXvW0>K
z1^X!$S^;y|_rviH(WB8wgGBk8a)YtmA<^m_A&%Mxr+nK~;;dkFx1POwh^>U#l3ZBG
zDdD*Xlljag?-mSIAlhPVew}SQThRM_a;%RFsZ~*(MnR>Ij_gSjN)js8m+75y&*CHF
zaHZb6uCE^MknzoTB^GwE9U5BPOa$oKpNCtk$WE^HLF9GK(Hy##D_v1(`*C4{cD1>t
zVyZxrXpO-URRA==nAo(@r>}0d!6Q!G+k^2>a3S0GKYwn*sD+!PX@j&*gD&uq)6dcG
zNhI?n{+px}qY!`~a#t^Zr<^p8zM=}aEqEtXFq~K|Eb@+f_7elpW~-Xz_hl6o*+u#l
zF4oGn7%QghSymkCB`ZryBwJ+{>h0RkS_U%ZY9k0&yW(+S0<inB){L8-&s%8_hbC6$
zD*Kc&<bOouY7g$gff0gw(gz(8Es(#kw!9|xsV^O>npAF`chV=pu2h{`3ECBXY`~VS
zyl(d)8M}T+>f7U-iPwumrg(k5DUwMi+f(wdwY|6Gxu!JFIRv#s>$peiU8xK7YZx;v
zh;C-SxO*39<Vj|3*)D9^ReDd$E2i@aGONTJSs6C2DW?rY>F%8_F3%QANPxZ^+9wWy
zmTBRRlNXIzzKt96v&JB7fXRNQD`snp8dFT_8VzCwEZ*dwea?eRa#UD-kH|0T{S8Fh
zQ5U5zs**<rbBGflNkeM>4ZwotlU$tUvgAYxhfXP(Wyt`q*@i4Imgb2(rjlOa)XigJ
zRy-8U4)s*Gs)uSLp1!X0Te-`XBo=fd;`*t4txGP_5UQDu9;{us0cOI=Ex8n6L8LK8
zQ`_-s02k`NN*YOr>|7FXnAWhOXVK1i-=D9a5(WI(T(L1OcwetF#TB2%TVJZc<rHfK
znsY!ab<1m);8=Q0wF}3Q=f7M44R<;K3@W{Gn)~uByKx<4BE>$ElgHNXAm9@$1#a7v
zTcd`_rANOjd@BO)zV8^!$3ZD4W%y3at_!TLjtz(+QRe6F+oW~5>16!Y4<#$~xZSWG
z;BU~F&w)OGN=LN)!bDQFqn;e#h323)lV{Ifhw>^5*+@xodhQ}eqOd=vw=lZ_THpg=
zR)vFuw65s6iMM6zz#dc+c40LCHZ^E<8+v&B-B(iXq85uQbxI(Y%<$x^Y8ZEcK$XWi
z(@43mE{Mh8%<vXh>Mr6F{1}~1rZJbGoED~Okp3_3kWittb|_S+o_9fyg3oq<h}KFk
zUn`HO{i%8VD43H+8(Jr0Ut@K#r-m^|zt^)Byj4oft`=3dSzI^2I`}{?)`07zPcGf(
zD~|BEW?5dGczC;~@}+`J6n$n+!DBYFOU7dtsdROw;LV|et*Be~L)CxF_vV`{-4k%=
z0?nN5w72CogA`cAZOUmXqAn{d37wT2L!Y&rh^Goo*!GJEFI9QiDApw28N69n^yiol
z;RU(qKh;?)mx`x6V3{&4Ls$YY8l<c`)A_@+@8i@C14%C;h~w1{{BNhyZw}IVA`h0i
zEnWP0UtA^7di?2ob+9!WTre@o8A&y4R<q-_Fk^=td=ni==s);{x3#m~>5&|n*2S;U
zZUQ+<Qt2+>8c1oq(SBZ$htSO%tMoZEIanV|LJGgi!H_aG6wYEPo^q{Lua~c4GtFco
z$b(1^B>P+}jv76$a*mg(Le4+l;J03^MH`#UKVY-uV-&-I*tf496Zk=Vh<4|S8K#>t
z_cr)_7*If5n@Wb7mT$vGa-af)Q%yU64DtQiwlgH{Xrr~==?G!cF7usB%-(n|3gwcg
zGVKN@Q6L&bTH))QjIbEgxV#_qLd#um|M0|hqZ0b&m0112{(T(a2l`;@+n)!orptg$
z+X3T8^K7ys&u-Tw`m!ehF@oZc(i4~si9pCjvVH!6pruZ*Thx$EPY>MUx*+FD6c+!4
zERhOeLFpzIv3;XGQ~kudb6TLy<SEInm1oYjq@MG@p~_*CC3{oUb&<km#BKYx+V@gr
zR49v|<Pp#Dxj>#cA0SEeo7a~}tZ#K%RC@fnJ0|$eEAjA<g{cJi2i*Z?vHz4?ExCZ-
zVv#b9e`t5ba^X!e{9~S~NsLV@93d@Aes_mUr;AMEkZ9XgHV`@h|7z@o+~7@AjLmPM
zyxl?ddTT1pWwo=vdBwEsEhoxZ^!P{B@0p9kaCQa={1!R`8iUo*auNuqxM9#|?O=Of
z*b<!`C=P>K<o0A;1&6!je)RpdDBv&&?c<YSk;&UB5uc523RoS(P&f;*YXBJ#6kN?0
zh;o1AbB^z~^PInax+0KH2LV)C33NLwYc_iEmU@0VWlwprS#ewLwa{4#Bn=T$J~lx4
zjbk5qEWV_%gl@F=ZtC9ZM`xgzD$RR_z_^Z~4a(>TejC;uXnNJ!UPq}K+}lkzhn{+t
zQ{p;)z%(?1>H2`v@|&%?g101}GsL`13Z??_2NKdOOLk2cdCl?Xk)F*loi7kLaMc%B
z<`Uj+nN{^s&L{Zjm%p=jrR(0T#dmsbl+xcu4xLvl3pr!v^v5PG=#OpHBGksKIY<yR
za>SUB`|zF(5`~dT@vhr=XwipT8x}6~%e0H4RDyOa=JaUaoUN<8h_{9#U6%W@`Mzii
zCBN?vy79xtaR&>&xtm1mWxb}XRrFl91#UkzrCj&D9?hvz;q!9>?h#5+J8T;-yTf0S
z{fWyCZgdq1PPi_Qn%C$tkv?k2E0h29r_4xDEV}ovVxK%F!2$!^0ks?dCdBpoTMA$_
zbQLgIKyvtRkuWoI_s`C;tt<+`98H?DV0y~5?*_|N?iK%5?cM%^ac`=3MLQltF%IIs
zS~BxAe>iu^Nrx*v;flQIOF9R2j@qBhf-TO02?|OoHXJz?^<jg#<fagT*<scK?f$U5
zzBC{RmaBMM>9Xnfs8a@F3w%qHDtMK9iC1(k%yD^-A_IP>PHWl4S&^ptRnbswgaqLb
zLOS<e`_&C`Y^QW2Q!XQaz;YEr<7MEgp}Fp&6_`oTjz{`D!yiqW-#*J9`m!>_Ply!v
zy$Y*RcxcFz^2k)Aol^a~Oj<U`AB0KvK_Q{uF^z$m;xLxj^h{-L@b0+e33ihY0;z<X
zL-3#5ilCcmVQ+E)U8020!9?{E<UT()vc?h_nRZ<lmY<mDX0>ljUf?%d0B$d`?olV(
zGi{WrQw{r+PcwM&<?d@`;9Oq|yn_O>rL2(AZkJplK6peiPSSI*C*N{ugZ#+t(YHT1
zq%=Wf`x0r`>lz2>`4D}Tv^a#!e|QLtamu&e_59DsA4G2yp;*ydW1T;<Gtn6V>~Cm|
zY*?w|%dvLnC@iN_I8oUcZzSQGu2i6O#j!SEki*ay+j}eH6&0^v<^}jr!19z|5vk3l
zDA%sa<T=fdF5#stb#rw8G8~d@YsG$+j_n>DgC-Ji!1dEp=fm-8FK1sFZntMHJZhB{
zeMt}CmwQYpf(niW(`8AW=fRlndzz)^3%2U4BG*Ta7W0`eOfxB3@nUlrqrHL>-o~>3
zQm1r-@pM3B-%~fJMGNW_KbAyF&4iXOcRYKgo%DILWkI{dMbM8O#|;o@{e;<E;eD+*
zrwApG%jK>;E57Y1>ACNuQNt0(p&MiH#%=YJxTQFuMiXx?pZfr1QFE8HLF3Zx1eF(k
z0s5LZ1*Ds1(T{X44u}3E&NjVHQv&~bsf3cw%ifyk$#Jl2;5e1@i108W+M1LBPAp6m
zmYb!3i<&8_kPBBWX*gH$qdPj)3W8Dz48m#Qef7dlktOX4=gcddXZ2m;$RFLI{)3z=
zHo<q<ArCDw`uAjMw_Op$?ko{h_HJ~0EdM{_^)&F{ljp6wh=cpW&^Jg-LXX|PB*K+g
zc5Xg`c?m7G@9#dh6S*c05Uh-rKmQ(*W+=ol7Y{jkaB#5Rnnb#186ZVqQdZCTF$uIG
z4#J)BSH>~c&GC@%N*iFxf~OggBc)@PTF4jQo7T+G8c`N;s%+&s|HP{^#CR^2^v8W>
zNySDZqgu?Kzd8)*QhR*7f>Lq_j284(n@VcoEx#O@wD}{Y40fgH;V=Rc&+m{PNGkjt
z9<wwqp){Igrg%xs8V|J0TkJfF0-Ok*6ljkW8}s*J%A5?wex0qf+g6m_w2$3O1W67m
zvYl~xLq?p<$Q{Y(;~O1!2n}3u&hKm=CtWU8WFG;1a#Lx8c8sQV-rw~xt-MPoC!y{a
zd|hH%B~WA4o}re1xU@2<;KfJ#<w?bLZy<D{FaodX`|d68;Qib_Q($XMw{{qKxUVn6
zYiZi6|5YZflTbcW)wVB&9if#cL-HB*+KtSU`S?Qc@}j6aXEi416XmH5`vZQ9#o6tH
z4$7nf1hLeJ!<k6_St3Z;R}#(rWM|?>8R?Bm8<U6{t%bTx2J)M=JzdO7d!BS!M>z=}
zO4#z>M2&J3_qE)RmE>axULuCOS=;7h1!uDWD=_0*ATa5uN|28Yw;e1|db6XKlT>4$
zL&bBSZk0dDlt&|dxLH^CQNK*f-o&uO!|P%`?+96vIKj{AGWSj$X+=nI0}i+oNwQsa
z^xEVutu0epq9+E3F>}P}x;C*kpFl)fmMM5TN3JT1=u~h#hg(Xq)v=oR+xXlAhx_-~
z?0=`K%jRJ5xzC?#7rBY1)3=aAD4A^1eU5gvsqKe5gS(Tk3pCk14;cn-e8$HjK{DCv
zN3Q(upt{`YHpYV7^p?24!ZTpq@%C(7W<4({cwWzbwWIYuFf*w=Bs2SyerTgsaT<|H
zXCO+&7s>LG_WP-{lXNDkE%KO8ko(%E^(Q$@?QmI!<!*Vax;6Hat=sjF)9A0mxt|Y(
zmzcD0#^0^dPzFIM`K+w|&`Fb8WZ=j(Le|UVAFohVP3GaK<E%urjbZRxd9s*)yt8hG
zjld?(o|nvkeDET@dS@E<6_;#8BSWr5PwSB90=B#N1mqpcxOsi~r(XO|KB9w^m0)!-
zOjI2Q5B~5LGe~jjj4&7GaSM}`R~TCEBzy3_KM)Psa`mm2gWN0Iq-_5npB_dEj#@c-
zT`-Cp&Q;7EFBHuEEMEEaZs~9GvHd2$^Q&Gx3X)f;`$dnt6rLWA@!CXPj6(|G1ngg;
zUbY?aA3r=BfW%%O9jY~udt$?+gRF}wv*3ng5b3O+&zdOFtsG(eD3`xu-|K_8%;(N%
zOY5dDe!@-t-kXmd@Y2WrUO4Y~ie@yp`a+Soha5&+cAHW<ldxz1E3!J+<x!D}6wf2+
z<^JZY7h`4Sq6#4xDfJm_t+GGgQ;%Ftjl6}k`h-BphY{tRTQ&DUfzoT>$@U1(kuH;>
z%{~G6NhVkFmHqRS9-|axj}y}3-KPT4S7p-%N_2vtw(lSQ#U7S^jFLNK#m2AwgG9o5
z0hPGjy5{*k>mb0eMA88Fi#UZ}^JB|*|2j`Rjs@EilcJqh)IT3Gf!eBGf*>Aq5EfYl
zjQtdum4M7JfDMjPRFysBd-AXMX$JK^&0OBEAi+8!!zCO!*R3iJ(RajK`FI#)FY{^>
z&7bUSVck37@v&gDZgH95+<k{4ESu%QY@C<#1X?fOcjQ3&j?|>CNmrj`Hi|Mm$UyK&
zwV(@wZw?FNd-u;xJ$6EU^WE!&Q3Wono&c!lomAe*1dn`=0iLGd{g=;1pVw=>(G`Lc
zy)tW{%E24hvf5;SbwQBXDDk7j$+#ICh*A^ru1p^G^uN^3PUU+Mr2@=>7aWblfQ5XF
z#S#>_o|*$6Xv8zeM0(3*`NXv>!||Co>d7rHX(yW<Qh><B6D6)aN|6eWM?{$#jPtmq
z$=176y4T1XLH(m{&exnq?Ft#V5=G;BKN;hA25)HKoNj%I(69J9$p@9U`(67(AaPb@
zAx+ma!^yK5b$kX6KwYtd^@Ev&^=~!+q4crcPF>DiT+h&9#ZOOk_lKET5&t5E526%^
zgKsab_BE8zT;W7hru_i6kdHXk)raP9%p`plFkimSp<4^h4EC~>3xWd6qU^aL8-nfa
zJ06WWa3X1nDLY{Y>IugykYPR2^uBQv^t!DFG{p`3W>EA!<nJ89;yLfY3fqUYu6ZiC
zDh|)pD?HK=%E4U(F(<oIs`qh@bNVD^zD3sMSX})4dbH9_js*D8@9}|i&Z4z^*YFEa
zT5{^-pnOOce?DV>J!H9%?&7&dwsVVu;P%~-fb`wD#s#4H)9*|h^&!KQg=0)5uY_YF
z_1<b(ivKgCKrn0#Ixu<51r(VfAXGuf1YsK$z~-h(KsTt)V_T8Su=)W#ea5bYidPeu
z9~2rbG=L2q7TOi)Dcy7v!2Jf7g0b26Fc|<QIVR|H1@~4>+>YiYZ&du2;<pi~V&r<#
zk7RuSpfam9N7_~KKq#k`L6F&39qGVs*69Pt_4hxHHyGPIx>M`5jfVnLqVjF5x?{rY
z>WQydDsU{g()lCDN)6<1-DS!88@xzg(u@-<*RmqNzCJYtF8`u^R5FucyJ0f1plA#s
zdgJ(8-MN6uePi-8<<nj1v=_|XL+`V3YE?Y9FH}Zmp!@FJrsAwpb1`lwBd`13+V0M1
zMJ63m2gl@VH7I*cl|O>5mQ4LjVgQ_t+%7i*s1x=py~e<Sl7=h;|LML*1SWwgd7}L|
z%g(!F0nj&ZvU%ksyox)jDJ1*2JH&&$iB_S{wzt7Km)$rdg1|PKGMj$FKa?Y<!?#R$
zH^UEtjs4KL1L9CL4&f-3W>|%_)E2zd4aFm4S4BDgHQvafwYKmtKlQ>R&5Be^00z!H
zm7-opx2<&6TK)?oJ$H}=C{oN_wy>jv6<7PQ-23iMb4FHsnDMvlP(ae#yj8m`arCDP
z1k5Vwffop6ddqE6V3mhrlzCgdyv|(>=10&^ym@7^n!4uBnXtF^3<Cy?7c=`ar1ovP
zMy^GK2fXG=UKL2Z6}KPB;)*NP49a_b+2)=Mb^Kya`6sct8o&tW+<JC=S_=L<XiDS*
zt5t;}-OAzT{J8^y?)P4_Mf|d%zYOrj`HAmHH|N7sbcWSA@-^!l-~XIm8So}@(TVBA
z;gxp7nxIk14w}nuJ2RqP3EcF*8@&@iDyz!r?p&UL-O$A9qLmH^s?i0LFG!iz(g;p)
z%SspI3w}}7FW+S7aPF5R@P*cX<A79TsQ=)#`6YiqFZ`h-KfzrgkM8JkTw4Uy%*Xhb
z6Zu^=pPbLh_}ur(cP1<F-2o~f6Z97<P+s9A%|#=2*zMI6*sPU*5Jy1(FS1GOk7f9<
zwDR`i0sR4mP_k*re#AMNA;s-lv6?)(0w18CITPr~CYNK32tG*0UsFBoUI(Pg4*x4#
zDt>pN6>jYZ9TPBUsc9n<tDvxS?4(xvp=)#|3X0mpGH866A?o)ozF;UDwjCO4s?q=|
zN#xPGSeS>?^iS@!#AeBn$?VExA@hf#9^0ybczS<VW=Ffq+PH6vS5VFNMc-l7LE%RD
z^I=dS<L<p!{<#s$uhzuiDsY@?<aMaEdEsGiqMvV<LIg<ojNsE4d$pYLm^}^P1bE(6
zp4Y01$qx}9`~IH*l7})1K*|`R_inbiv<?XwCNEtr=YWMhr%h!87Lo#XJ<bKg@cg<G
z$@Ba}Eay=GrP{>KKSVo2g#_PRubEzKmNs|ToJqSsuNeHcacEcYv1ah_Y@=1-zhIa1
zcp`SadmhrA2Ju2r?DHm$53dk+s^K*Rb8ywF&nsQfil$M6r!t}aHXXMA&Ds8aoBK(1
z^y85A$~VaTz6l#_&nd4qN^JFFlj(p{qBY3|b)3yMF9^l4Rz{jZK?Bikj&EL%mp?uO
zvhDn8@qejjUdp4{*w`eCy0YZ`Xn#)<D(Q134b-*#qEP6k=H}+!BCUH&coWI!tJ1VZ
zl#?ivYHjXm9Lka}DhVbsF);-*%owA^+JSA#Nx!zcV?_DK{h20(?Nv|X-YC0R6{G?~
z=DxcdQw4V-)5Sj@aIj^{q)~qMVK-m=S(h+Tys0}}j#XiMazKa-uESp8QcqNB@Y6{`
z1I4bUK8;t<OPLxE6&?8}q9M{@glOHP-Tv07@Ljodn%cub{<xg^l|e%*)7YH&?RuVv
zGAuD3#BF7T)3A!LBaHr15U>p>TRo;H2V1?&Y;4Rf_QQ$yie?o-V)<&%&o+;h{1iif
zowwS|9Tb2MbW6Je8fQ1qv451(<vG|@8vRV`n;v_JNEonuXOCV`2p`|(5mmFqOgi)-
zMD~CR!pdGebK%;UvV<apT_U=6aWWg<d#{BaC*UBB&KE~BxWJ>%JUUYlUh{d-h0pU0
zUU>`t%C<S3C6h>+sN5oY!^?KgF~k+csef4rLU}gIe@5FK9_`B^#VkoT5==K*NnX8x
z=|L!Ap)eHyKnuR5L*qQ|E`<XeY4OW5l@j`<r@5;+rKL9wJ)-@<CCTE{raK0eE<*o6
z36E0!HJNjz(@4aWP|*|4>aC@RLa}iR{1he#*NZca5KG)1@(k`%nOT-KG&AZLfEzH4
z#@oCHZP&k#KD}&kt4)eWPs`n2T8CRP3KgIZ$2kf-mrtINb{X{Lm*ZR3FkBGQKA}w>
zT_T`qbyQ&MmwtcOWD5-!at~~34C9V?V>7q19zoEh(M{+Yaq7N`@+-O+h<Ge~qPT`w
zu5XLRR!AHzO@3|{Q25Sl>1->VB~vj51<m1i;RK%+bGlz+hxyGjPyKoY!C=6IwAK!@
zifp36+IgH`afQmuG!@CvS^@`5@sGfeuejWuMXSc6mm*N0*f`0wLn*$v<<z`)=l9fb
z^?iRIi;2Qsk|fntaVY`y_Cz|V>mz<|#CpOU#4|Quk2y>s3L9U2EGO={9TS6tb#x&<
zMlv0(0d@csnAGW94#u520;rXsqHG+1Lpgx>yb13EKHr2^8evw7&E4dEyfpYkAhWP6
z0sH9n*mwCE2c&p1C}>&vs{9~wT8BeY5YSReJ$#Ale#IJ*Db?(^qKq1=&h((<v4W#s
z{fD~wR2kENV$IL+WWK`C7!3}90=Jf7f_x3WH^&E9E;~!+E_CUw?egntYT)(ds1g5{
zI{M|a0HF9B2qC0K$+Kf0JiCjM|MsSdjmO8cIo!c|sfs$bwO}e5af~C14W%0FqgB3o
zoNAPpsz>DZ8LH}+uW4Ix{~3U;I0f;s#^T0l@?a!AV$+e8?~fiN3A9AQXYf`YLxgb8
z(i=2$R4Msv7PiuR3VJrjdN<#>kXzzs{>0IT85|*Jk}E7gR>pTzXqgQTvs%mtbv_Re
z!Evxh)w<dTV*n`oAh0$h@BPCpg23>n?#;k-RY`(!C2${a&b|j6aJjF4GQJ1aGC8yl
zXg#4vLhib~H2K&r@_Ly;jut{JeV34}m{?CfR3{ftIUh|uqL*Yv;6Ece;i^tFW@<HG
zH2XSCX&W?_C63bP)j)S{U>i;Gw=?#<)$+)6P;*odb&J53A;AhUCT@^RVI4<Mp}mSX
zU7^8ypAr9W#8#a#pQjLf+~WbQlXJMy#7X=Hxlw*K%>MvPnC_O`RD`yb2*c5JF99s~
zd4Eg$>j^0^!haaSs+LX{)C&bVZIBH+AXD_+@e#=T!CSFfwm<TNP=|6b+pSML(+1tT
zD_ck}z5hi_=I0a+nHirw0*pykj@Epu-}Vb3(2GrA4anyX05^aT0c<pUAR`47O4^QI
z|LSp0d_oVIgnr7gP^#y2_^N{{OJKr0<+NEQH2Ycj7|UN<u?K(#>l=ZES%#z04yN2}
zkoOyo-S}rl2SZ=1pY&zP2!Gk;u)tQgT>e72Wt<&n3X%?D01zo_!qJTZvjjX*EVLH|
z<UZW?>ctPeCm6)mVh@Tb7iWXwIin98dJ^llVc!pku>KEwU*T0%*S&ivkw!X1x>Hg*
zB&0z~8U>`JLApb_L%JJjkPbn*K@lXSL%Q>>V|~B4<NgJA48}W-=RJGxwO7nF*PPFM
zp7#(mA;x;|Qt~)r@>n|KElyNIwlg4ZP`nd^hx%Tw1IsXpFF2M;#rtz4r~u3tkukYP
z<jsIql9-3y@7$2?i$fbpvUZq5*bhhJk<TO=5h66%!82U^oI-<Es`cJ4@pIHnfxz%A
zBZibddE?DEAUMzQQf>%4T?mYPvr<nPz^8)v53yX%!_7FYg2Y2b<Zqr)<UZuh-c!i!
zs3MPK3YG5p@9WoxcvwQdDy2_-w<iFEGHKzNFpkf2Wj-g#_MZAK?fwZxX|=6CN&J!I
zOA8^}>C0so<#dK!pmH8r3_~SYk`=x_hZ`YRNOhj9e~pl{{y=gFqQLdVb<9(ju?_Z^
zqxKd=ot~mO0!s`~Ba%~v(i{!$m`A>%Jed(iS97T(ghp>Q=H*$_g^d@d#FE0{(!Z3e
z2M*wuOqCS~cwax6KuY4XjXT3e&Dt)k^txTe&k>%NBcvt<+=thqPKHM!W`^Q{P~k1@
zbSOR3A2ZEe0BLs2SImF{Z?IWOQoieIMlmxOpilZpvxUVhN&D`gIB($qx^Xmb7zHKt
z<@pbkUL?lqx~`4(r>On$i$1<?KqR%@lisuvjSf=C;Wq8MS`h2sqRz$*Ypm`}_zoXK
zj{(NSpdY2J41KtzmEKyc0iE3vVzvL!Xs12nURq4x>Go$s8r60rPv#9Oe|)X?Ka*36
zV#Qr5GbgZMyBo^jq}WEi2{{GKYuZEYW551v<E02xLc|YKecjtsXZ5NO&QKfU1S<lI
z%kqXzr90WrPjO8E!0PQatW;ZR6`xci>j?%vn6-~G<8axzKi3L3uBPJM&Q`->M3*%V
zd*#ET_gU49co_MVpdH5N4XsEAcN#25{GW&wqz2LmwC5Ch57~ajs|sM|Tipc>h}iu>
zHDV^a@*{eEDn#zr+^9y*Kfl!sZ@7pTkq*-jGm>GuJJ;B8s%#_vj!`Z1ebDJkJqp_a
z+k&=T5rvTKxjVTb@~TGuZqM}QLJ8x7pGZP#!PJXkQ|;<kkBP-8M{7~1V7pqC?=8|D
zX}2s_^k;`&h3lRBHYg!x2<}MPfM67a(L*^iBHlm#<$r<bxI#DGX9)_&->fx_C!s!m
z&Ic?_2pyVeVllyEPqZrM=E*ka(>VEP7;Dg>{Ee8@&-d?9^LVqJFI@VRFB{{{lbglC
zR4?5?u-JsT2hqn!;aF=R<d>R=8)3&kra4$bKfT{@2uSpb!wR4|p33xE004$3DpY2N
zE5+Mt*Q2O}D_YKJ0-nzLE&g|h;hr66=0)r8IBlI7MD?BLHcX@mn=q(f(`5i%u#Cx*
zw#W58<qxjIDiFV6hnYCvW5eqYtPln<tL$?9wow2FZQG=IeLfgTIzPes_*I2ZE$@R_
zm##A<8dGN>vFh8XHE$Dthjisn=VCsSK)wj!g2~{G%GvVK+5W9As9oXiB)e&ExXW~q
z$?Ly87S<*o;$m9BuzK{8kd^5S0O3M$;pXK3Vgbzmqkn)YkK4O`yAUeN884DRB7-m-
ze#sN()VrGP?Ffw&Rqo0eIs7{Je<}xorF{!J%6|)LOcF*U?Nfl?tt8V7lO7(CIf6pE
zJE(yYV@{bhFb0vJ7n%lydLXDvp$ftx|7|A@nt?{7X0GtZszKGxSjfQ9pfX@S;Qg&H
ziU~!tVXM8BS^HOoEDj5eEcwa@iK&496~N6%B(DZ&>So?(7Smy(L4S_Mt(idgceTHe
z0Z&YL&h8pUsYL6#W3Q@hd?Yll+N`A4AI<BhsUk(oRD-S<%^+XV{rNQ-4L(8%M9#!Z
zb?uX||M)jvD$NgER?kM__d;R6sxMj_v(I@1vFc8<Pi045iZ(QaQl?8#$9Mcq^ZN8=
zfu8BD#85WI1KFc~1&MN(w;Ox1(>SB_QH<IZD8ioSkZ?i}g(;~i)NMm@blB{)EMX>5
zd?cI;ge6p_Wz?kGxOlX5-f;uCloX)q;rZyG$7atU=y@Ea1EN%K-`{WZ{0~Hnv$uN8
zSI45kayJ($LrGZuI%l$&GX5osy?swm=@iWKFLY|&uUWMfp><{!$<eD1H}*n{g1wyY
z(zn!DL}9*T1eOdWYkqohKyfpYRr~1ZV8Vn*!1G?39>-92B*<4Q^BS}L4)yqq(s`eb
z0|JK?0QO1#zM{ne&1dH;*%AJ#2p2O_&0N1PER*`=F)r)-?<>a<q`V^Of{jYRb`^=K
z@msNaUVH*c*ZPdh1x-`1qvPq%M8moDTF=rYe>CUF`g{bAk@O(8MP4__%^kgo-7}uZ
zs$ZB=Li>?SO8X~>jMbc{G7DzX7uti0^;#2Y`bu*bODhK}<&5OopIuRd>RH2}R(NoJ
zs@D$v_>7vWu-g1PjMc>>wWoLXrXRSP-ER_T1~nz$B`8#EP=fiWszCV18Ek)u0I}~5
zBYX;&vNCg}W5=saMg|+?%aHOwSXGSv1!tQMm|nt=Xom|pn!!9Tuk+x3Tt-v7KyOH!
zaqqJhfk60%waEj@$&cEIP#T0ZL6@-kHn)LANF{&-TwY&bb6HITntm7g>KmF-AQ4A5
zWW8Br&io6Y<Z*Ku;Ob=clxv$iS50>9C<7(}`BpD=wyCxNc+4tE$ZCaqqtRO<B)x_k
zrdma1O*W^hQ7#U9{83Oj3)%IdT#&jNUGTb-kdTefiI{VpSn_BxtLbO^9o?aN_6!i+
z6uh?NE52yWDQcZ}P#R)<nJqq9A5S?w&BMmaI;bfu<aH@trts5re*-Cw%FOOEw|{&#
z^<m$D6GLMDP{!l<(>DupAaq6nA|XK1)1}qpdv`5P=2xF>MzwN@q3@1s+mAEog(x?E
zkebkG%)=gEZ(=j3RQB^E@x^wd7GzToD0NVL4%;$$GSV<XGXWSD=*$jrCZ&w&n3aw|
zM3sN8#nErK@Qy*fq>oTTxR{+aZtCJ9BDcCA;)v^Knp;&};L9Dp_Eym{7W2Ch<xV`y
z&Cf>#E30GgwBByW1cc!5GzvTCLqm5Aj#_*kKMJ*u3?1+Bjbg{k*Mdx}Of)-J13J+#
zp(KFNsN^$EZ-R<Az$`#gJXMd7k?4hU{4_t}o}=7mNn0TEHmt+?;#w^uRM>$pziO;%
zwO3+p4$(9Apc(+uE{_l3=IUKFe~xG}WFI?ieMx-P6D;d>b3ywLlJ=uJN7DS~YRTlH
z3y7zsn)7nz$A_*R@^)NIeX`L9x3fGJFi+z1=Cl2BK0$ji%IFw)g-S4-!EAU{3q;Ob
zZ+DQM4niSc>a{E%MJdmjwh+C64$7^}HUt!`GXgA7?5%Q=VuXUA*<i)3YO9?i!}Dy;
z1$NgUM76q-PB)jEe(J9~ksP%2Ze;>n_Pfv+Ace|^px2se)CjpJ%L~d!f*uMeu-p@;
zyg5vK%_1bB{q%l+&adzV-a>=H*tF<bu+E_Hbuv>F?!hoJ6uaPC$Ssg7LP9)0OV?bZ
z%#kowJ+)V)HT_Cb!v}w2i~1<GV2XgDKyUwexxDR`qG=of8V^WqIFf8JFkdwJ8fSZa
zuq-tkk+iieKQ$>!)_za#YxGjJQK!O*FHwehBCD9$L>_Iy{G%GDb@fjJ+<JNkb_5j;
zFyRDyBh&cxR>L7^$7?5yt~cxz@NWWKm0MkI7=|=k(=&eV&ZfH_6Uzarr-GM{TFzGm
zKngDPz;EZFcPL59)KMslo(&9AnO>s^3C=Z4=E0*4g?a3J<@D=-uwe@*UnXU@SKjNH
zfzxYm<~VjoqCj?ZHJQVix0Z9_1-QrN-=_njHaVxfuq<c2Vun+nNTv%mDNW7a(CG%s
zajLgGfupLm*^vWW4=M}7{ZWWY2wG3h4%oCj)mkFp+H8m?Mr_j0_GW2D^mGeD(geSh
zE&K{qfiRy+WUC*6Hw@Or`Os>j9e&Si9|N%#hYaN-eLk-QhAZCOe$OA9b&u}q*c|7^
z{M=l;U|y^#W@NX2QaL=x7})Rqb{of_AI$NNjWjilc#4GsvrC7F+d9W!(%~K!ixpg<
z;|)>q^&YMZDzQB2e4EPER3h}qN?2_)0Z5@g(0iPW>;lqJhjqvzwSLC}l0DLk#;Bzs
zwhVgFVZPJX0QEZ`S+gaeK)~z2#ce)pfMh}<QqTfR4%lYa*grp3F*)LeeG`Jqk6w;2
z)g4T%!Be)>dLB+}b}X>gn-)1xu5LA_7>fIBx<H5)`!$o%GZS-C*o5JNfa>caHALEY
z`NZHe0%P|lCYLayd@>!$EP}J&n)-SyAlu#}4j@@t=sBT|`sl&AJs^7E{a&J*qZJ2)
zLWG(iiF6~Wt<GGDJVb6~X&yIZ8J!hz%E?EDr(av%)!0tKXz@E1@+PKugo}_==f$A>
z@(|fRw1?ueZh)e*<L}mO%1|P~IOMz0E@5%QuAm1$YU^xZ3?d2&XyH<+q+>9T#f*lM
z<A*bZM{K0-gxyE>Es_yt&(1H;+~1nkj%Jh$=(zQ#@{(JiA+{V#Fz+$GL|Iq)idhI%
z0!NBg_PMaA8|&amED{Y5kt}$(1CFnGrsq$D=d5>5SWC48SQ{`-^S|?yfwDdb?jNMn
z;eZD~I++6e7IOWH3Ekt@{Y?G(^>l4=-!+(qMIcE8S{LzaMR0{V#(bvKFTL3ULgRD6
zA=NG)Q_nl2=nV#I<UB)49#id`g@EG$p_)XO=F}+=RwPH?m4Y00h2d(qEH<X3CVEb*
z+bEeFfa)L+Td*U3;yZ5AKR8NZajL`F+{Q?)x@tvqJ=5sP#jMRH`@uz!tBz4oQ1G5g
zFgq^Jn~M}jr=1x=@j^p6;m$+$P5|4wSPUTbNpu8Qu$3pZW^yR%)@qX9iY^wA-mCC@
zyiI+a_qlceUtUk6(cL(Z;iI+Qx*X-uI!*lz%p|>PP5n$sS!x=Bv)_@ji19UKhx|Ko
z6uiEaI=(kHY$ml?)e1vraSQs^Ab?e=E=&*b!1FF#Dj_bffhgyU<7Qd^)vK|arjM$p
z((`0f%bE~L@38wo+eK=yI@ve<9MyCmj*z*xa5Gl~2~^eO6o0K*KMFpI48o>;WZat?
zP~&mY;w&AzUg>9V9mofePDOGx;VT~v5KLGQK4N^t*%8R8T6e!$KRAk_KU2L>Jt4v3
z^h4MVIk1r1nw^U;KOz*0S|`Ms?6{0(KJ{Zi6v{nCB*S2PFm0EgPW)h1gLXteOWSF8
zye3PK`Epr3GeTlD4(cG1HPTm@j6AXf#pEeG)>{dW6mRNG_ZK@ROEg&Lrpoj+?wp-(
zQ$z4shxP3NvnI%zjfNBZc)4gX-a{o2(bLNx!lPA8AGCk$sGB7gU^<#9s#>6sW+VT=
zs|eNrxDjeR!>n!BpYG_cFV-_ISaomiY@(N@_Sg4LPWCQ#M$0t4%G_8!<oF}OiK4HR
zjPhgdKMR8G2JSNDIt4pJ#a$Qa3O|o`V;!oNR_+ik>**Jn6_vIur`)%?Zt$A0GQvcv
zjGanrUD1A`nGtvXkB<v3`N7${mrpeKewBHYu6Lq`M<IvPs^qw*J;t+@HUnzF?B7c@
z=@V?%^BP@eZM<36pkb<-HqtEOPJFma^ff#*Yzbnz{la{lB|Xy^2WrxIu&kFLfwfjj
z)lOr0Bqkk$2hO~j;vn#`DwpPUcaaE#ZlF-iY1Wq|%~)mqEso<MC~m|Sml|~HPtBVw
zv$cfEOi(Mt#I<2Od0whIjhUtJ?5OKSppw*r>9oA@lX&%M!oN7Xhbl&W5IWOJ>-9Q=
z^3G%!5Z%k|C|Nv}S~w@9)|R(W%U(@6E54x0KY<xAO@t*}&L1%;oY3^a5@GEM39#P)
z!6MX$AblcQHW#+l^mZzmY<v72UqK|Pnx>Z38`y~F*?18?joXNeJ>|%r4wu5$-kYrK
z?tj62dNYEn3~DOIemTy19EoMCrKLHDXJg*;K4c%1#;`c;emKmvlBIM{y1=u+PL92*
z;$z%TM7AQ;Yjw^i8>YM`6J(6?+Mb)lAl#F)L`4&ZglD)_lu2Vs_wvpWRBnR!tMPuI
z#Z<Xgah2Wm3yxIl_9tjt?_I6LMbbXFMJgS39rS+iShgOvb#4U+1c9x-77!T0{DAwd
z-j?!9G4A`rO~7OUA^T{(@OokaRGB?bWYIMRReRSSfJjxV?hT=!%c1|>LuDnz>Bp3k
zVPqf=0xSMFtkH4%IiQB+4ZSzz;_R%n^R4pYqdr8TMEygy6NBzmr0{K~0JzYlP2PCR
zXdodRWQeiWY*)J%aib0ydC9ZDxlh4#GRA^Z#^)+BL%DOe)6}#)f`&9W(bGB2bQ;}B
z6|Z{<&g~)*t|V^iK080&@S%zjDXRl(wBMW#opcpWacMDWZJ~C&Onk|+5Bz-gW*F>7
zfplIEm{p`0^*_Nwp_ivV7#Jd`GDeGizWb!Y$sC3PI8i<rD2woNsi0MvoG6RkUP}zC
z6IkCaP|l3hT4@}KxtI3{QxYEo(GFY!0!kDlrF9^-t`mc>?`V2w#Uk1{AlR?1v;}A5
z?Oaerp+`?j9G6Khi?k}f&n3DTK+HDu;*9a;uq0|NxYCycs$f!;dFi+(4z!1r@6?gt
z4SShHzecp;qX=mEHle$DX~idzAIS_{B3XGIho^A;QXTfpC!Z938=dgLZ?L%S?ou03
zF={Emj@$4u+h0}~l$a0Fl|HUM+-k@hK52}X{dQ$`aG7>3`<NNx7==(K%5Z)&c#zU-
zlB*Dq{RR)6l@1>AHB$Jd&_NcvYa5Yp1(J5({Vq}e)y63TX5XYssapAir)l)|iS&C#
zE)0x6@nCi`n@1*vN_8vW%P!Ph0Br1x!&Y~a_%-iyw$r{G#6~$10LF47n*I3~aJcn7
z8UPN9sj7*da;CEWoq7@$6{Megv&@sou1MyEK_H^s*RBnuO~hpplwwl_T<r9&0C^|9
z$?%=rrCyQi7GU$80C4?{^MmE%tGzZar$V>|?-!M?l4cpvwwhIiAi;5jIe7eUcQP5&
zibpG6b{2Zw-7d{rw3CE!0Sd?4p8IQ1%He}aq>okIh~TtJ{nFCm1yD9>tmLjJo#i4i
zQ`I-Q<m&#|mfC*E`P3+`<q5DYZD~ES55<n$JFItK((s#)%dsrnXWBH&W+%QZ+=;gX
zXw*W6r6RfSYBXqi%~A@<q7Ctc74*)Ba>hpxZ>H&r&~Q^OMG^vr9C(BzVkE44ufKJL
zCyK;tpS%i!wSM%z{&aU-eW(_}kIFf{Vyb_?GBrHMWeXzIrLmY98_EcGzlY|OQ$7BI
ztmG5ORO9PHit{b`*zPA63n(x8!4-0t)!@6efMl5=Sl!7`fF9!8Jyk+H?Os{qL`Z7b
z*`vW#*8O0@Zw17kWuv@r<N+;+sNY;u%4(4z`ecV^rUSGfYr&yyJQH0YHp&b>GRt+#
z_>m@CFH=%h^K;-7Wb<!oNyb!eE4>XsrH<3RaK*@eU!01R#0huxBxN>ZR3we6c(&i|
zlLG)?=li-@ewjak;?g!3k%j|-V<}vG`HQWgRJa{^_!9H%QH_qz2Ytit$vN0Uzsdqu
z^hGcI4hRt;!~ScrZiPdfqQa&_I@mZ^Q?sm7T`#@-#t)kyL~#_v#4SSNET4t*paNrB
zg>8(xR)4>M9dWB2^#b_ArJI1kq)>JsFpl3VQxT7hmc7-@4P9mZEkCgq+!i1x3u&yi
z=w5$l<JkYgdZy+r^@j!fR9Pf>L5(~6pf~(A_T@RAlGkrm60=|4RBrLxKMvoRjzC_!
z+-nYWWV*Qm;Q=uTLMRwGI}J7zk2xEUS!=Z3cDYb0GIMhtbGbK~QZ-u~X=}EDBTqgh
zfkq+q87P*|V>j*}9nX_BJKtZ@@%#dK7?U|n(B~TL-+_rN<>~3U!|T_6(wjQn;`Z@U
zN=gn|`JO12)r>L*IJo;$cvxG!?$WPsZp`j(Z{h$|Dw$hWM{<Q2i4k6&Tv80Mt=$$C
zsvDgoo1Vt>YHcJ^nGq*7wVwfF#J3-1Gu_EMe6C6@=D2iEXH;qMfleX1G30v&OaPv9
z(P!B?kdfjI&64U#(9sIOVuU1wB~{yA;JR}!Zu+q2fQmeuKn$(8<<HY}0?N}I5N_uV
zf6?-V>v(q(5g3^`RJhgWw(-1&qn<v-(;G$zZ5RZ**>Nb|_ibQT6-|+D#N+eoi>q+r
zkX#DYf6gRF-M}6h7^%EZPE7ccGX#@g0f7(d=Q6jyz}I{G(s#R<@1<FuL9eOESNOrW
zcc<HZfxY+Rz!`XHApPmOng;7gx~_{5ouF@Pugv3wBELiWxbye?iW`w0?Xnl3B%~o#
zX25wY6Ft2e@Y@BlYqgu)BgseY{N_kd-Qc07tNGB~VTMsD@Q}Q;XY#haOCu$+QZ8-o
zJ9i}yYJd4F`Vq>w1M<<A$Vv$7NX80JNRhB<9~YJ15>=SY@OoQgum`jCfTB_*#C1K<
z{vn%;*RMXr50j^uzaKH$bq%~Y-un_iA_Obh393f?1{ncfO;T9<@+yW(lRansAolVz
zd8R`9sh|7sOTF-6l_Rx8+EhxztAG>2Pw_w&W&&cpKz7ugT965s3d?jqbx+mI%MkeW
zMoZgl553M#(P}qWKCvN1p|2X<_nrqOI904zkhr3C^Hr1chY3vD!Thry=53z@xzs2}
ziyeFx>ZBc?m+04^pZ5x!%&MPT$@>ooZA<~?d=;`W+vF6P6<!%cDlD`*kQm9K0x<-|
z_?>y0;lw=opsL;046n)q;tAme6zCG5yg$Feex1Eg^mGJxsNYf$4Lb^;wA}rJ$5eNC
zG+wvYP;iZ>1E^OgXfzdhmICVZZPH%mqu)kH^V+{|oqUmx?s@gte3I^V9+a>anSW__
zeJy+M>(A}&fyQSqprRiMQ<Di&IE97-O~kcR+%`W1E)S=KRU1^J4e)(Wdm`1&7(@^u
z@AOq^>5PUm1>ki(_HXcu<LIQvjjqK?NS(>y&!uzq8l#td-;iGKEy8*!dfpsrB1(!q
zCnPD;6L9NZ6>}D1mx0(SWuE{IXRZYTox^NW)12FlTS1vz_cxr@X;JBh=N``6WXJr(
zOC!!S$G%0DaxHK4N=aKOy`{`lr{i1Id%L_dlo%W%JuehZ*Nt{ucVPM#YGSxl470Hb
zs*+-gy`I8Q_@D9pyj?u)Nj-tt0%v|B%zJ2P-|a07;S0)()<uZhg=>tZ+*~>#lRg)=
z_G;W`l4-gkPj1>ZJ)G}Yp3;g8jr!lV`mYKkH|ci_A+e#&ZG0^XYYO#B9dp?;7=ciP
zd#EeF4jjQB=3cuw8JHr|9S;*6nOtY%bWLi+ODk8mo_m^N5?0Y=Biar2HCt(85fG&f
zLPu^7eGO7t*BBy(UJYV?fGd3*Far@>6C6##Og1=dVy*T@`%ypDwsxLsH++4(F}AY1
ztAL2{C^v>0u1W9Bw+bIX+42F7iD(iDoyn}CdotZLEv9DMU<GxLWKs<dzC#hQl?R_i
zr{Y|AKD(3mV%`F$@TkR!%v=!AOH9s`Z*YG9fT%D!h?^3BVbiiv^G4GGM@7VsPeLXx
zVn4}3sb`c);1lhuRm8FvoID_`bhymKcYO-iL-#f;!^{SNUP<u%kR~hZbJ;VYegPuF
z=mD}GTSq(>fSR&foc{c7g0ls!T?h1SC9x=ZFQ9bE<-AXxjBb4?DKZ(k(|SbVW&%RF
z0Psb{Ep%^E8t~|VX+4~~LR8A@eqJ#KkY8Bj$7t~{<HFjl7f0=yUv>r-*BWP%XNmvj
z>z*m!6VwJ^XE(%^i+mjCn_a<p^Krj!jO)5tO;v0JicnZn@(A@Et%91M)G0u=RX<>Q
z`$>ZGu=2qzlf#MFS9GljsoM8-bl!YiWzuRgxOL}~fO19WIiV7CH}Qj|lim9Mk(K0$
zjkioZ!^u!gxo%T8A&V9wlYOIpLqkI)4|=9T05b0VpW`Q45vQ2^{QTUn?oEpJFgqW<
z57~F!5`WiHv&sd~wySfs*E3+F(o-QEvqPTT74Y6+h|6Xo3`-Qy-j!yL`2|Nw(8#=s
zvUM=J5Dy`pI&7l@D$y4=F%Ljogp7#J=H}9I_*K!1N(#&Q<|HrvY=o@@?OJmQ5G~0E
zA@Hhl1=B2a15C(B=Q|Agc)J6scCUMyeETCE??Gy#Llk8l3t`J&rQP&xlSrCr-IDT~
zhE{v^v2eG@BCA?E<u_~Wy3jefi=5hO6QGdMCMN4HSV|EfLQ>Mp;0NVVc2F{C_l8+n
zU6lZI(&T^^o15gGzo;gjOu=F>2cUFMfbH%*U!e073q1m4_4a2a#5`6Tx#J^~yl;01
zpfU`AGvGpxfChtvFV4ftMv?uC4;iPD$>B<$XnX9blEa8^WJFQI+Sp4{OP6o}kalrm
zZsUfUAXUklR9K!p!R&6q7E^x<`~fru%3l#sOc>-r-3A{3B2Wnb*{{872J2W#QG&_<
zQb}VHKA=*(x(y9ApjKqS)*;(`B0qfBf=z%$mv8h5hNckUi!P8);YI+E0ufWQvWp#q
zHx8ZS6dGkZI}JJUZb@*Cocl)R!kqiLx@TKo7-b4)Y9x)Igemgb)J!0sMlxVwK0x_<
zV8d&<c7(!0a0j1W*uSm~N9bb!@q_j?T<5z3>=P2+0EO~2$x6dp^Y|0+`;b!hLg-?l
zMHr>u0>ohsfF5P|t8^ku+SqF6#(AuHXD}17{W2KDS#lJTB0xuOsX|f+v5yLX|Jhnx
zSA>-SB$gspxoTG*09WF0ccyy=@56U#ZS?e9b@aD&m1LR$;P&5|VXq?6vZl%xMsr~l
z)U`{%I7&0Z{n4KzP=T<Se5-iraChr5Y(pgE{v@$n`hOUF3t|hmt6NH3ixoLPorm<&
z+3<ChbieOi1DkV=A7!$hubTA&rsRW4X2h4=c%MJ{-D6zHppVlCn3d<k;@RTC8(=4+
zh>Hh-i~s7+*Oe~ahHfrZYp8xKCZPHL?Q6fCI82Jl;tFZv=J_;AjrllPNtt|RgxYF+
z$DbQ#iU2C}O<+0vu{fDWAETzVD5=xDBtbaUpBG2Mg8ynwJ9N!~Yf3Q1hY_P}DF${g
z?#giPwx9x5J(Z$-7S8dZ;2%BJ?iL0q_)2)tbmN2kcK>PFzM=rZFLWwh#Uo&!Q*$x4
z`JF$5p)+8TrBz6MvLgy;BAF>PrC|zqxqt-AU0C@9UQL8(MtnYNr(^}lIZ?c@anX5?
zOThM$rR8Qn$y^QjjR@KAiUA{)#o5*EnyXQy8MdK;EfqjZ?ijoZ^9R<D4C~DgL`F=6
zN#2XV?d;4p#D4!M3x<Kdt*SY{P9R%^M0;NMyVmzPDaE4a78*q3)o|3Z4=YKCEoG%O
z!@(%OkcG)03@x&cO;-JAM{gl0##08yS(XXoPc;q@3-aUtXf>A&j`5>?v9uCYjK37h
zvS;?AzzTD=x3n=^UPl-$3Y{v-bNE!%F&9rA46Cf|(;}rXb|V#Ow)^bkea#?|wENMT
zkA7)Xsrfvhl&s*bc$o_v>7AXO-kzR!Ke}tfM*5(l9>xO+v)qEO2j44;*|n#E!=b%P
zLXrBu@+<OahZxXNo898$W=NC4xx|zc>_sAKJ*}#F4v!h47)uJNp!r{sT&2a6?@pY~
z*K*uapkhhae^5G8!M~RY11b}+Q4E=J%$e;S0#wL7kt6KxH<$6~51)uMz5SU@#AOcE
zm|>zA@kKx>R4Y-3CNMy5Jwc*7PBgm?j8gXLt;F#TP*kh=aoD`iP;s<xM#Sw%?!xyG
zZBOafxi$zEPuZZa(K1f(DUsdY=r@YxB#s~v5_{Dci(d)j`#G>glP>4`<W8>RKR^mE
zFPY0?9IzNoYtsOd`<L#Yygnj<v#UQh4=@iY-=CMTJo`O?KjexCFK8Y=t9@dv_pvcP
zovahe`$A`s4%>v0k!zn=K6cM*JB4z41pMCRic;gZaic+y6D2|Y+T>(?cyf{l$mJgN
zSMJW@Z}8jw#R4=u1(f)&MvqQ^YC|bh6IIkoUqojJ0vREI<0pcO1A(JX?^GgCqkuBa
zfeAqSw9p>NlGxeV{iwXTxw+s4A~89@HrrSnn13lA12iQUdPv`a#8iakZb_{=wNj5(
ze}DZ~^-SsVr8pKxhT`Y>g|(#A%fIZ1fvV4uhSm7Bw|tG=50U;<7DxYLDkWO6)r5E8
z4MZGN+5s)kS%FT2ow6@H5<wjqlmt;X%k^|enqDQ}sNH^Rk{<knkXk-D4h9XP(CmC*
z+8agg@S_V!tJ(RnUk0$?h(3HEs+AA)caENXud~d-p?|If_FJe?S=ri>;jy021@}?{
z>ON4myfI7d>>*b$#wj1_dd0v-E-p#9!cVt<{F%{igWKO+ok^l?edvPZHCZGMbV8N3
z=a{heYB)9#t&asT;4oq?JqOqZo4!>aXnZXVXEaj*Xa_m0eTFc4^923jm)!(fHMwA}
z#pL8rUhR-Uw1G*I>WLu9p^ymK$Q%aXL=X~b_p0GG9reCFU&d&(TMYrYNdf1mVvTYI
z;IY4MOBWBu8Us|O1p5F99|$CXI?p~$n4Im-aRQ%iqlS9S`~#S<U*5iRs|1Aq%vq@W
zB)Cs(ZyC7M2>HuMQdGJZC{91FEZ9yTngVNeK8$>QGPS_zdHUS^jYh=GmpdWjOjyU(
zBl4&o=s;OAW{Q#k0g^Oj_C^(;Es_W&u%v!L=OYH$fqYtY>&=C6a5>v0ArW+q3&CUV
zS3=ayhM3lD0LRB<FH=XBv2DDS$FLjKe4({vcLw-D2k#+r`$t7uwG1YE*8uQc*Mc<W
z?D*A}k0ozixnuIn#^#mwN^6lgW`OPhbHT$t+34jEPNR$xo-aeoGT0b(SrZs_`;KpU
zQ|awvKH=O=<sR>JlFT?+Jl6Ge-AK_z9^p7f+}1av6jl@#a`%vT($6S3!S9*KD?mKg
zZcvHK5n4?c`&x2ab3ABJl94Q4`CR!@+{NW*#+@takE7RI?jj%c(#gxyxSlq1m*RuC
zRf(OQzLB!Q>en02m!6Olhc_|@Kl~R;SgJ~8*PwPfTEpj}gj>10hTgPLc6*N-V8<8a
zU0tAcVZf)Ai`R$wYSb4KKn=--8SRUvL`>BBQl^_y?PQR@v$XU8e(TM2Rls6=js%bn
zst=PP-6CsR2e!{)=@UYu`vrTR0?9eCtizlZkc@IWUAz+p(qK!&0LMz<&Il=pGNyW|
zi%<&V`0DmyBF$8bxbV$6isHQ6M?qbWx5sT9nlG7+W!1wP;f=_fQioOfO)w7mXQLzL
zw55m&$MjFy?rxuAqt|3tN50)?eVA@Sz`$4E|2B}8RY)jP(W^D@MO#-6RprE?vDe)Z
z?U2CNw1`=>O!p#&=OgT)W>>X@HRs2WZL2V%7Uf$XRZ2IFWY>B)lD3aRUF$<;$M3D$
z1Z1DSRYf_Nsn7#~=hFT@k7#M1%4+q|=<38m75jI+oXkm~P<JB}me`3TcWMj2#J)wh
zZF>vU)N5R<E<vr_HZr_P+-#mKJ~!L#dWKj1WPeA8k9xVDJP_hB)~p#MzL6t&T4>0m
zf=JWLrYw1Z3mF5!lMTQJ$7#PNce?G<hVK1<-G9_vmk}m3s0A_z6ZGjCl0;Qn;>`sx
zq?SZ$+vO?eKH9QoUk@HN3ZHSnIPSK#ip+DNj|<=3O4uvbk6)FS7b=wL#gQuuHJ)D6
zg?}?HC#EbNfjnVHMcYE1Rgd0nY3*H*Oi+qsN1-?T0egHoVYt;GRB=n9?{&4>;K;Lj
zmxD4HjG44bKBnqp)k!mv;dS@)!~&k1DC*Z%+dhumj&m_Kv0mePxVV>2NT<{@-o&{4
z%aCUlhya~yYE3BTt46tAC9496xtoDp=47Lz1-Iz#XcnBw1f+kmSS>U)Ev*mmoc!Lp
z@X&yx49K&(Z0@(;Pv<*5J)HzGs(bw2j37WyQKKD6%u}XzhQ@|CFSw5-mVo8p^Rn9C
z5x3&rB%z*%a9%`4{9C<@r}0%ad68bN<ON@*9Z?=5I7WF))CI97Y7g<whn9~-H&Znv
zVq{|IFGL+B-*w;L<`@Ku$J}en+E<(=yJXsHn^)f_n-^{P+-bevF1(lR`;l9~-4>a}
zBpo_B!oav0e0duf3TnO5aN}r=5fE}+$8UH*5!9z<Wc1a7L{d_E`PUN(jGoQhknBsB
zE9GIFJbK&G$>P~<SY4O5K?!LjGGD5lvb1=cR8k3$zIb4$^-Xck<li!;4~xc^oMibw
zRw*pab~^4^o~yZRz9c-@d_b;(ct=Y8V)Q5CWpo(>!J1cnwdSh!S}JGX<YOu71{wxB
zZ+#sHq9^I|9@pYSJ0&WE`i9~GW&;nQho}%bZ`8CS4eL>OMcV!}>KmtwT9LH9Rs?py
zm4{Pdi~unc5EN{9h^!<b?C$PPP?0%+>n#s@o1mR+bP3s5jjj#$e#Z@je1G<_nrE%e
zlGv_4!btl2$A;s8vXveLT-;gN9c{`(Ls6dMG8-hL9vN967@&W>`|%m&76WGa^?M1~
z7y2-=Pf0+1a?ZVJrn#xLtOLhkW#9LGW|zt)2v5}yedR>cgzqk7j5)Xs2^{n~ki|Dq
z!}}^X-~(}6zT&Tx2doojf$Gcz!{`H5K0|CRx3PD!DUFHgBS+fK#iSj?bQ&R5I$Ecl
z7{Y}vxY?v$=Si2AEE2r2iTUaQQU~t(?#+sTwPMxf?r@^KW>Hmt#?gbNAznYPjPbKY
z)6LdHV+Ea{A3buC#H{j0-m0`ttSoR%1qLH^B@?*sFGLimO~&<$sdi?L4CC1KXngk~
z)a?)o{X`mO{61<-o^ddK!=b!Sg|Zh6)SdJYR&RUry%QHGW;~9WK$<6QJYVL~bUy77
z)S08phsZ_am(LZ;jCy4T-=xub&h{?NQ7h4gkWeL+K}|{>?Ea+7?KQ9QNP3zl5I8)Y
z5v7p_RJ!M%*_&Y&?XS;mLmp7x6NdBttmT9E6Ra%iI6pI=^?aH55ZO+u((?9To|N8U
z>VVIWaz!zXa+w5HMVV2;b3-&|=$=)-b3f%;S_k#`f$e!RUr$$}z+N}_rdc3ZJ)L;?
zO#6_KVC2O`Jk2x9c+2Iep0v|B9q|HED>PU~&LDPYH{8afEewYEtOsg?vyw2GmB`)K
z-Ae1z@0vJmEF^~G=o89V99ec1Bj<H#BZu<(N@gD2<E$rz$qbOAK3^x}=&iUmH~pe}
zto=!c^DeikA1ArzEUd&nAxm@0yL4sUMmN6fMxN4X%5VcNgRB(Iej@$Bd^p~p9xZJD
z<j8NfQbRqNFDl=k!DwNWnms5DjaGQWnrgS2p5QSg2=_Q~NpRD|gK}aUZ=;Z3#|?1_
zgfQdHt_YG8v}_>Amh&Sfdp0#~Z8G-|jTzA~|7%nCstk`_s*!Uvw<mgkrcC5w?1a58
z%<v)-(iX@<NZ*Ra(TJjRGtB3Q1t*roP%$$dpx%Tdqi=EcZi{l+H;#tb>mBd^{6eN=
zBC<LZM{g^7sd2NA^KnY8XyU<_4hxPnx0+A}BdZ}Llb%}5nvI%*UE7JRh3C@(!piUB
zH=hjLGggOioi6|rV`y)tqGTfJ*W}LH7-$Bwh|p4d$S6YfF<ioNK}w3r2)V(M>2-89
zy;2Vadp}vUl8;}(u*%2-PjHw}X3(cF<!OHQL^lZ<QW0lI7}jr)K2o=&N>o2-)&73C
zNCl{V`q0DVq(GMeEoBvmO#w(l=|}?QOT-X>rTPhuVIjEHgQ0~VHjVpEUQ}Jn3~<7<
zi{1;t+fNbI#A3IMvc(20z8!Yq4;9?E`>u#_2MN0^hiWUjI8aYOg-m-rp|Kq0+b=J@
zhyh^^1lNchMhs^6`qJJY<fKp#J&r+7J&=#?yOzCY*lsij@vNaGAEHsV^eI05Zh0_a
zFImbxV-<u6RURbAD8l2DJ;g~gHOV>W_i(UDBHfMpL*~vIO|Bs5wo)M3U8RPwE95FO
z_f9bnM;bfSr|N?h@M&-=d||I5)3Q$ZF`}&7hQT4-z%ZZ}09+})NA$Y?I?7ZiBXHK+
zZ+~U(kEO*(6L1-q)F5(6FqSS{R<>m}me&ePNG0sbZ`D#A`Wl>!M<9kVuakePJ&@wh
z*ZaeV{xGpk$PVR5RuRLUQq?)*0g+-FUVtQB&iY%v*frRq5+E}0mGO4R8g4Hm-Yaa;
zg<E}EqPLkhugX!$>Rw@v{@YWx87+4V2zvj-3@%}hCZ`AY_3)X-@K&$nAmA^dnY{25
zbUN1V%DHA3W<@GmE<OfrAL)^KjrAFKWAmrf)UH5oGOnz0!m(M-xw+9L)XQK-&};LM
z+ID)FnP@#f3LjY($E#~@A;>Tb7*xTTjQvl&mxw72WL9*F_1eoBLHdOvjI1b$gD;WV
z(fyl~GYvb<6#i!)sf}Xg%G`owLl=LJmwMy>U%apH?=(i{<7I1yg=QZZjX{KftwcWK
z@C8f(DYv%+j9eoGvn`BfsSB#=LmG#3Q(-W%LX$qqhxIcis;3GR2)5i~@?UI`(|LQp
z!AxEr?Tzr|7>N)$&*41Nl`uZ)T6$iFqIg=hfHqUNSI-xV)h#1x%+75bzT4Iyu7X}$
z@MDvtS4hM9%Y$vU50{-Ohb31QYa87&=}MY%ML&#R9vM6QNcO5K7a`g9kTY5_Eo`rn
z??-%dihL=kuTZSqn(NRL5@;flz=RB6(4J0NU^-OSGFfb;22Q+B#zTPt9=DE`gGN7p
zpLtVY6MXr!xQBzUhzXt%NS0TH(p&e3%zE;>RsEQ?`)(_t6^Q3}R6pM3XIZF|j~6Gb
zW}Jf}8a$2{PvRGru`jTE?8m$i7HOsHO?i0$+$v*k_f&o<v$T4<0y-&=erA=QpKr`I
zlR9RTN?zW=%qHugI3v0IEG;^k?3h|=IP$w}V=Z?3GS*~aIvhOj&fDl3uf*IBW2fZm
zxVFT7O6uD-yvC^VrM5(EZJc2>30oi0iVrWP)IibW(OwCxl(&3!<cUE!Mcmu0w2OV+
zTE`=wcOyp_XMOgvNfjiReob8}zif!IxX&4>@I>LJtm)A$wvk2_d^|lC?*Kz~P%68;
zayt#q6Q>Klp90RQF|-OI9{k){50kRHSxG?vuInIH;vcMjfx&>j5Ie1QD6yEr<V-+=
z=JT1Jz7aiVjuC+XX>*HC68ZYt&cjX-JZx_)159=sOg05c6o;Q9y}sjTxSs^;?~#;I
z@By$qG(^#x*&ckP(P!P~+UR_PuBJrOzZ!!3e2%tr!IGW5RDO*N99-7uZ4R%E`7)dB
zF(c55RazMGhMFG7=zu|tdE+nlj~%I^2E;#@KA0{#16Y<OPaPJB{TS04le|=39Lxh=
z_UdasyGyKihL{ZG^*+&}7+RzOrC((R;xGvFUf1HZ!@AiyCM-(6^_4{)okXfRJ6<F-
zIqg^uDmaDM@o#0<pb^(!n~Xm+?3RVeh?AHwG8v<FClokPqf}<hNUqeQ{`UFT2aga~
z=IUh!c=qz--6HJ@N?{wSmB|sDe{_WqroUX(g9f0feg&Y1^&0cNhmDK18t?O}GUJ*x
zX(cfxfmlp@%=WK&38p7-AwZmE+w%R(>9~`099v4@p+EVsDt%|TCy<$2SjXbti;Bu?
zqLl3$Lmh;Lqb!bzsoVwc`|92x%;z9NiO&TDqL|7!xNNX7IMg|Gh4*9ZhnIw}KBcwg
zZ>ASTOw84eXqBD4FH33t?$Km1g%m8hcr|^rTD|wW->G_Cdets(H7;Z|Pb(hzx$fOS
zJcr3ECf{l*m`Lmw2Mb!7bw7kJ+*ZvmjxyBjUJt3{PPh!^J>AZaVYN=r&3vj%OJ5fq
zN(O@k{qu=MQkq<9q9B09arDjlY|$dV2;wm8QW;w#dKId#hwgOOZtCP15}2u}Lvf>j
zNT3i$rejHjzGu*KIc{riuur<n+h5LeLYgA7fR&1P6c2}p-tT#{*WR&w*JRnlGF{<P
z@@BS5o_o`4o5wP`ib&E$eQqMxrDbBJl*2-%WAeH(-F&hqrx4q*>!CONvtNHzfe?wb
z(xvpmoad9$lWEJ%hVqN;9ZwHOyta)#Z#k}cC6?<jmVC90Yq>hz_Chq;^91@dh%2L6
zot5cCJ(Msbm>}oglv`MU)@*rb=4{m3?2_kjK=Hixxn_}8WvME?zGSf~9diImE{z~|
z2%mJ@XB{%6C<s!Nw+85pgRS9rbygvKS+<6Zzq$wpt5*i}PF0Lh56z!Wqr=K9#FnU+
z{>CMdMm4L-68x*>WSih}=}YD{Sm|Hy(JbSg9iibQRO6^Ji4;z(9St4<{zTNl)v@u%
zyxhP32|*H%2JcJvN&ZtA{T9_yzMV9c2rT%MR`kwR7`J|m{|x#sszo)>h~~zltPd&K
z3Z}E)LjLwv479g$M=F0m^ch(QJl&S~11qez^2)SGJn^y=zZ(6_6<i+U8QLK7`=!c3
zBbL9UVOY-LIEn#==-<zzii!tU8&P{i$p3n%2&^)dh%#-6O>Bh)=&AZ59jxE)Fpdr0
z;aGbRA?DXhImbgJNXmEJO}Fz4v59!<em|4UrxbcM<EH(aKRp>l0TaTf?4RPVt1>D5
zNq#lv_lU=&gLlYXc*PR+>!sRDKqH(T*Pm+hNTiL{SK|KZC?jfcwWf52VeFrt!~q>1
zOfgHflbo%9WyO#v{;SbZ$KZ0R8$5fT-!G*{Eu!30!cRKWNWbOZ9sc3>GoO($f~)&v
zE+3}+>B;P8pu=&NXkJc%p0ZJCp!&T+a%sUkY<_%?LiMX7_ZdJV^yV%`zk>edw0a})
zr=v8b!PV;H>)-zLB&G!D@a+3MNKN^LvHmIj)W3(HN)TNB|EJvlpK`xUMD{aCHhMU`
zFL{M&Z>0gtdhMUdBLb@_2c{FAHRK)bZ<_;y_3R-mh~2~*_RzUg4M*7K|6Q@O(Aj==
zYv=gy+6T~8=gT(zcilvZd;`Dn504^&+MnMGHUPR{cQ~h{|4iRtQSiK1JJ=?Fzo5cI
z1Q&I6(AXgVyRjMgW`F3Hb^84s3s^$7NFj-TOdABL%O9-d_XcZZ|E}p10XJsfp_ux6
zS>IFe#o-RMhS;A+Q(+oImkR3pxj*X~f&?!Cexp?H{P<6Eqr4NrlMW7n9@?K9V|qbb
zw~%fl`R_@#z}_mPTA>#FU6PNW3qOZ=#P~n=h1%5p!Ee5Qmi#j^S8yBgNM^S`rX6%M
zJJe9VdD`&skIi~!1HQm=hvW75^97mDw+F!f^M8H#+wOyrB(n#<VK?bw{9{|dSM8XY
zg`GUGe_udE0D6sLQ%A$UG3ep~rogsEF!kZzL5To^GGfV#FZZu!pvERE#*qKtqfNnW
zvC)cmV*kAXJ?I(fe#=#0zJ7g2_7eKVw-3u*e|7|K_*e4awM&ET1^(S8>^|7`ul)Yr
z#2a1*x*uTSuu}e=Zb3-k(T_U@MgP50j1L%Fqi+&rK7THYc?Z5&nU*c1@<9Sf3lWfu
zNNi_ZoP<8NogY4egnupVvt8bQ1_;^=9cv8QbzE3X$Finfz9eC<R*i%~$KSiizn%df
z{GsObJ!Co+o&q{EspzUO{x<$^3-|Fr3+HF7%eWPg+MgDHAPhTTRDY62{%->OdomTK
zHFUaskFXI+2aBU8_WQ8Jze(}uu_7<PPy7QS!==gCp_3wz(vAG@qyBhHFsxt@m-FV<
zw3ZVfpUSN2m+mF;J6X*RM`Qo%x4%Ed3StFw;)jLs6XHl{-;RZ^SN$81ze9xze)7q?
zC!Y$F9(<yZh`IT%eE|LJ?F2^ZU|7#S&sGldQUeG-#Or(Aa7CE(75!Uu|E}r620e06
zL;NUp0FcX28u!OV$Hm1lY2JX0fbqrQ>T@9c-3Mgn5(Z)Xo=^6%A3uEmpBeDGO;`zF
zO#SB?zbQ0Zey=<H`EjA2*t(e<Na+OkE`q#z0>|)^rywsyf$0R&o<^6tHKx&%MJlv&
z1kN{;DvDfQDZKXioEBf1CU_0Xc;=mRHk&}w#%xM4iCseLqZKo?Yyzi;<IXqbM=mQ5
z8g{A%*ne_p;e%})PxDmv@3jACDrCXXAMRRqsn-E@<|5Uin9XvRqcwJbGYdMNE7RpK
zQqBoPCE-)`yjaCIo2{?hJR6fDjeke#`DBiOwRztcm4J<mPT2&2EbS)bdHO)!l(`_)
zF2sx<G_X1l76G{zu+vc$q`ggc-|s+Vw^<a?tb7%)0z^|NEKurzbJ&5keDa@J^tW9&
zj^~eh<JzOhpRUZi?QzXB3t#X1A!!tA)tcx3WN$P8xH-G8nkIeLZ8xv|aSICJVD5>9
zO%QRKl0BnAwcjpUX57m37)W52C&kQPq=x_g(K;iJK}{yz<5V+tiiQ!j!GTP~2QK`7
zjtv;0S+M+<17qHm16<4d2R2b>Yefa=;PAjJ^~mem)@9KLalG)Mko${t_d{tyF7vpR
zp%e{}(4YapXEr_7`xb{Lm~4w&)gXF&RE6tU)_f>)vEPX?wD4i$)zk7Fi>x3%kn5F4
znNH?&ES@aa7oMweJ=rpj!TEmC<vKvW)s_4NIN1MvA^LPd9i)RZwqnMLC7E7DxTPGB
zlZr`BX7B<cJzB4p`1hJs^jtToH^=irYfQ()^xS@OY1X|->Qm-;yYr&$$ZuV~#r0(6
z!^e3U+AWb$A7>6wu)-<20ZLPNmXyI*M*~O?;?E)u+aKZ$r!yAD%iZDZ0JBXgcR{ok
z>39v`-ey^^l@YclcqP(ajIX`;p92>45Ugja9wA9f+?&gj*eQK4U7#Sv^f?5V;|H>M
zLWbAv^h&#C)7uYmj2a4MZMUw3k6-yGy&LMlem2SLyq^gqjik~&FU=a()7{2{`9SF}
z+YhwExo?<E_eFX){c?Nob!<dOvm>yFNt|ZMSzFJ4NH+r+6e@gOhiJm9!vWSvq7f02
zPL<8I6l*;Hys>{4^*<ll(ZI^JqH{SW@@I-?(VdAS{umqz;`-KI#EcQ{XLC+=fQ67a
z@}bEI9hHdC(ICf->lRrgl#;@Dw%)e83GsDjIoVb_OT!9Q?9&1u<VJIia)ZyHN=Kz!
z;C^v<IK;Ei3)Zp~gR=*?QNC9}$5A1et%sm-1EAxHjz|g$vF%qL{&v8BZzR9|iv@Mf
z{TDuMFfZmr3b8LKU`nK_WnpRs(D*!X`ilkFQl^u*oi@oxddRn5D*D6F0@HzP=NzR?
zOd&=6gH}=<9qlgTmI#$tgYA7OOuI#r8jC5GSU*}EMUtSW_fLE^ltKD@$QcM`Mg!DD
z{*cqj)|cm5VhAorLp*s-d-G#{M42XFGm;vH`Ck@v3A)*xGl$K?D%m|0L$(_ugQ?=u
z)_d4s4yvQ9K2^-Stdqokymt>H11OCl4i;5AY(7^xKvt{WZN1u!6;DGa;Ujnn3?=>Z
zdYhL;dM%A%j|69&zxz{cJ&{fJI{1v=EsJP8M9#80m^2XvvZoHQ7AB`V)8nA_dqWg$
zl;cy`BDsGBH__jH{;z`tY;rw0*lEzHG4u=tHOM&0sQS#fP0+E3o(_UKg@`u`t-3-t
z$10SEsE-7`sa7DzsjP2|<w%x_Zs5*zrY>n<I+l9-O?5|-vZnDl@<{{bHZ-xKs5gW!
zU$w4jkI%M7aG?=%%RO@67j@aG>>12h@Ho?L*lpaF`@-Vy-f?tatu;!Z0f5Yuy%2$Q
z<$Hzx*LeL~9nw<3x{h0#8Q~eV*uD`nTF@I2x_T2^LGV%;$oa(qcvwn^23s7;J10ee
z)PPfO+ZxMxcYV-9%I>tQGjZ=r2G{_o_H!G~e}t|AB@dd%9G45#CXq;YRTpk=q>o_W
z?%iJtj#*bLR^>HYD*P6((%)L%8GGOG=Jsl@o463*b-E2VVikp?WrxFNh3=zWE&5@s
z6ql5RpiZK?X8pI2z`unO16-uE-0$P1nxoN_Qbu4;qme7ITkV$|6^%$1p&qi-BdHOk
zSW!Y%t68K5#)d*go>A4$+rL?~cg}uF*D24@&!$Sq<MJ86C5!!>apVu-D^orY`|lPF
z7&sqnU^4bUVB*3i1@jT$P<JWYns1C|kq^PV)Z700frEOgvr;)nQX0VT!xI(E&5JjY
zv$2bf$x%_hv}IfF2URUgxG|k7{?84K>aQv4o@1B|;=3POto^YJsgpsw?#-x$ND#i=
zxzR|v;6_~?%;6Qt$;zfX|3KOV)u>v+{7$=^(s5~OZDNnQB%X%wv5*~L{qI&gKwFLW
z0Jv8=WFa`rTAZD?(F^yoRCw)Hg8Bt!9YUG~>BiFICI8!@{GZ42Ob$39SoRA~wy7w+
z*;rL)#H1bn9}`>eDXcOLF#Y}B(}MqV`cshudj{eKdOG7Emrm`pJDU{f_SqNh@Hn$r
z11xW9@Wy3<xrNKf*sOno1^;DRv4QS#J|8<L6or|A?aOf3o7a_$d_<zb2sM3q0E~V(
z%)jR^z`C*u^#H;M9*fP?S*FnBHiPW}y3^-vpMfye^U7Benk}xi-UEVXv)G6AK15=Z
zqD3cCk6grAns(SVZYX!m0jiW8I0G6}M->07J7J)Knw346AOe8t!c|T71s;(+z!5#<
zDT(ke<S^>Rbh*1ZGY0BzlB1RLkAYrN6sgzs<2fy_KkmrIcNIlJ>a%$_Yt<M3)xju1
z)&d8fDzu%r|GCA-Btz%-yx!w<(xDWdM1YJRz!1JR5O`JAaynD5=~$nQ-s~Wg$ih4#
zc$Nwz7<)<w2oK>SQ^69*`+!e@E}m%`%6E18Gxm9&Okeeg5F>CHhvGmOMee}??Eij2
zQZV2#*UUcqIY72C#U=+porrDM!G?-q{$>BI%U;Xb%5tP|0w`8_&sK&=>aGNyV|>}M
z0AABp6L@|)&PxH~05r|HWV+fPf7s7h-U|#PRNu=E7+z^GmxlxlTC<0N?KK8PsJ>VW
zUy9W@4CmaBhG6U`fX1I>S^W=WaIBTp5+VuWd*2C0%@|-q^92V3_0oV_bGY=5RPqbZ
zd;@bv7ApFu3CwhUGyRrPND%%%zxUq<sH^M-(PIMOEyaPni5w{m%Gae@wK5E<g^S4W
zpXw9@PgJ39h{Mkrwb#R`X8tkw`tHiR^~>m#8zLeN$K$fAKSEg(fE|<ryEw|l@r2^g
z0`S`m;Hl%b1EtX5(}DZh2k=+9&Idm>#YwJ7rZV2nnAt6d^CZ6=5jYAnyZVH}dY;B=
z_rpLQ%nkjSZA?dClvzXzzhpgqX9VEpYqh-gYrUE#Y1A3+hrybPoSo%t^RlchMr*fw
zZFe$xGV$yyXpszqgzjelEcJgrECxaZ9cPtfMlFOcHz+AL+%HxW$3aODWu1;)A2p3C
zlC;YlfjChR$8g05o&|T6Dw7c_0cEoD-2D80pw3AR6vrjSi*%dnbZj~h4zG_!h*^nr
z0I2`{PYhVu9LaFt-T<G1W9e0)5zw;}vcH*gTCfy*UuQMP0D#xLX8q5?!W!h#1$cjQ
z1@XPm2mAJFAhFdm>`F}6fR(=eM+{murjrsR&a}D?V-m2|*#oR?so4=JJP*tN*I5e7
z1{?<?EL4M?$i-s-6U0mqBM>lr;JhnB<wH9NT<0}&QOp;Z6t%X?lCdd>6v*(z&dod1
z)jjw52wlcSR+4~{p<D|05S4|bUWRVd+oXL4DyF(64RB@M0~1V&wNio;Fq2cb+CvNU
zT3l&|8lY~`<o0T!z7W_>@fE?k4R^v6t57;Y&c<u@V%sxy+X7fNeHwpM`=*nx+VorX
zu003;7qjpYSlN2&c!MeOhe#Cn#L-M@OMtX4vCr!cUj#`dTSMS%M*#_E;z&||mJTwU
zjw^Ty)Wc1I{dPE7M+8yCdG`CYTccTi^he>%+-KlSBAq=9&R8$~3%~)IxST@2X*EH6
zB~C>_`fS|~5hIzqH9O{H(nktQz*y<P0_;MPLCODzwzmw6x?TH51qK;<01=QHN=iby
zr9q@akS;;#5E&4GfdMH=73q-fQb6f$kVcUXDH%XYYF~4&weI(Q?)4mdf7;&$4`=2-
zSDoiC&hrv@9I*$B2q_3g-$m88Ru_USi6&clno^O2mTS$<0zv%G135OZ7LzHBUc3?|
zP{dw&`xNZ5Z(PpzT^o+TBDeqi$F=EH+5gW5yU)qutHF<WBy{}NBvLLQPj*tIMEsgY
zvqQyI6=%UtT@D|s_q1n$dToyP)~FQj7MC~uVb#G~G=r|nL#IG8og~5+Mh8oP-=gHu
zUdTCZ)60KUZRd~4PowC>M_KKZNzyjP^80~-b>NmY)25|Vg)I3B$d>^d`};>1;oq_T
z?-xNJkz%Jd#`oZ9uv%ck@g!h95j8J1{#dj<*Z3Vu2h$M{NHIO>WzP^=f~t8bWXc(l
z?JrNKilUq3+E%>hUrw<?yBvE=aCC5-%j?%}>ERQIyDV@DflKcajOtx}eAaFDf0<B?
z*Bcv0h*$4gg2vgyx53@FM79Wl9)U9zRtnz)Qqv{S>Q(iK5|tQY+G>!Ul`c4=roPua
zU8cfY3u%yf&pvQG5B6_U;U}<iT$4~E{1tUz<3rLlVHpAfzMHH)uOi3(#UTG3Z{bKW
zX7J0RMObw8D(A$I<jnW}{QR)r!xHmen-wu`YX|dO*s~^PGD8o@MbkoA0SLcKSzrl`
z1DE4k7&F_238IIPtz&Bwh8Dt$hEQ<hu}j+CekBMIe(<Hts&~SOI@vq;`lW<f8q|zS
z8lV$^x}eDg!JHcZ77t_*VlMrOi(2X}KLaEWP&LUu=3Wy9{_aA~-^K@K9?tAv94nAY
z+oG6Oh>LV{BQ|h?sM;njLFzUDqy4%Gl4rp=MWO$WDCBN{-ixMHyYD*)LW;)6BH7T6
zPH1^hmi3LxV7H(KQBF?IjpWuy+^bf?o~c+pyewm3hjISi*t=^W&(RoSgz`6qIwta%
z+-6gI!*4D@lXTdDi9=D|S_PrD{jgW?KZ<P?$}6<D#N0AD4bqcz#(Se+7Jsv(e-ADi
zL;khz;-eUuOgUnK%~&1gPq`Z!zYg^R2H#4u*FnN4*B}08JGZ@K%Z(_VERO#CozRi&
zAKTgiLXV|m>Ysok5R+j^$3BBBPzKLRX-VxV8q2R)-UQBC%>)w;izdHTAwt9s;9GJ5
z^0AWv=aSb1yAr+Odj#2kZ^>YAn4dgXCBQCt5~5uO&gbq<-{(8uR%?w7Gkcj5Nx1wj
zPL^Zb?*?f8O<4X7&ES_{KZjpQFA+%thBX>bB<aC_EiWigX(+5Rx1%BQm@Ra}!Aaqt
z8R;Cz;JUw`4Nr9AW1-Bl?)(9!;3Zq-)a(ECw6YDDPurF3IQ0nvJf)VCAl2Vo@qe%d
z85nf-Y%$O|0i_JU#s4K!{k@qR0I?mN=$+vhLyHal0%B{ddYXi=%UVbH-%<SU7agJ)
zKCAIwYMKg$1?y4vO!_~e@L%5vlfw{a&;+wMS$+r@ayBs2LK+;K5QXTe|7H~f!GkxE
zs=_0opz2F8nogX#J(M^BltJW&STus@?*X#Sen1~_fO#9to^dz&uLX`oVAzg1@34ah
z#@K*}NC3y5vB%l~CpZo<v1@h)kU2N_%uO#Zj^}gCB&n4DIpVA^8StwX1#L-LVFF07
z===YoiPGM@xiRz6F>z%$!(t9#Io}PG`di|EF25F9u>KG>8TEZ)Y}Z`VK-ka$WL&&R
zI|v>9;OwQz7hJ3Pd#wL{u>|P;G{g-4P<u7RVZ49|a8H6;d=E?B6EVu9noFd=6m?tt
z=g2Mq<WGNGf?&f}3;`W50T09mK*$ZKQ~>b9>33P>|MLy9*u5Cq-eZT|Fd-8XYqAc4
zdOSdBqYv2Sj$_CEX4wC|MUh6B_2jrj7Xvwm1=bExXh|_0tPis@{@0jek-LEpj^<Wx
z=wU=U)dnc@v%+d24D}UxL=J|6F6-Ul|D-}?>%cIf9*nf_Fm1kZ9BFBX)cr=VRXFZQ
zY*^I7!BG5w4@LgZ+#Aje-l7QkAS#(vusrxu3uOQ#N|f0W-9M(F4YR~egBV=2!10p8
zxc&eTC+oBw8~=C&MT`&<X8GHW9jLbTyWtfM5fb4Q4^mnF=f3(s56ZcTKpZUlZ2->L
zFIY_ae+~jN3`6^qMa^xavZF<hrO6%w@saF#xaU8%JTwt}s~f&=owO1?sjGh~Um8$E
zTw(bt|8o(-$S|8G=xa(E*i~yy>}^Frd2Zu%kN(G$E&>2V!7E2vn_sZJI2dz~t2Z8g
zK)m$(?{)RB7tHct+7<bIs{vs;H+!sT1Y~i=X||!be~hpx_|`VIpNmH~yrddV-J6~P
z`J+iLmEj*lq6qeSI8@p7xx7wy%2_(J7zn7)FCF6l*v=>nIa~*ko=wk=ekFQuC{a^~
zeMmHH>>vDO7!>R_veuu2nqcF2&Qn-T0E}yuD5mm{5q1N|DSl$+tM7W_d+4?9(^0^7
zo?dtN{l}1yVGdGe>c$y-C3@FiFPKY!^vt|XzWy(f{y*m+D~uPj$H{-Yy@-%tm^&A<
zkWfk!n}=)0{9{P||6g=pf8>I4zJ3h#4FYZ0FWKlhPz1?}krGqIJwv5$uLb=6sneS#
z={^1O@px;x^b62!g$F?A&ajx^gG6QQJ>`Yz5zRdjwxL)5e)xaO^-Di_VzWUT)v9$g
zv?CbYkU6H+|MK_;`;RZ)i68*Js?8L0yy&8J?fJ6X#0`o>-(j%UpW|IVT!I_105qnv
ze#U@ekHZPW6iQiDBtJz21~p)qW1H@1;#{B4A^^fv1GF&%)yw(@@^)Up+QeU7p7EJc
zJ)bwJdBU{Ya1gCuYHR>FO~yh1c?%Ryox2Z2K?HgPftDmm8iFl-jrGfaP8>L9j>)O{
zt^%+d3KzaGnd94jEbs)aXKGS>_%RH^4PH|*b9Y?fa&SSqDk$}dD!}BrK8qn#wty(e
zi(&LXkpr~p+p^|Jz9yrlKbzbKwYC&qpumZXR>VowWS6_Vfw}g_^At>YX}bL5$Z1rO
z`5Qo25Y;Kk?)KroTX)rV1@t5i*b3Iyk`(?O>H;x9{Nxif>6$B-ymEJ_&h6NVY!WH4
zWKmaMFj6A&cv2!527senLgWXKb`JIj1c!S-1|4M>tv9|gRi4rCXS4Vz1#(>zOx);Y
z+CcVD7#T|rfCZT3_7tRTV~P#`0iGBc2gDr}rv${MG7+0(1Prr9uzOwpy|3Y_;Aw&&
zUHw-X;CW75!VIw=@odgM8`BT~x71@8l{b7Ab77KoUrD>*VHjMad_(?M@>bawO#pCN
z0OcTj!NfP<IV{Z%h=R(ToE%lqead#e>7{OtDp@^1H>SiPcNZQX0C0I90Q+3f5ChvC
zdx_}Jfqj0uZ_%@j(N3r&Kli4o&+6O#PAZEW!q%!RL&Od+mOtAm4F1Rz^%UITVMl~G
z!vH|2qR6=esAJkc`tI7FXyMP9`ylZhXxIMUhh-N32=EtuFLoR4Pr!9EU8q*+lZD`T
zZbRP6MML0K5(@eb`faSvdJew@s(^GF^evep%Hok$8b}3u4GhiL>{H!C)q!*cN|TZ&
zZ<w3(6podeh$7)PIhRjqIpeq4bum#Rg7xg*@D{lM4ln#@s`BTs1r6MJl07vzw^ACo
z@kZ_P{B+D?1C-IXLvI0Bzkt;P7ozO0B>VKI`yXYWodO)FQAaW0dok;~RVv^#&Cjv`
z&mg|M*XCDNzm{C%D0qv|M(D|iEa)y`3ku%C0)Jw?*!egGX?s)76h$;8SAq<~s?Hu_
z&+xPEV1O#Zj4WX`u^cA=;U{)8ycqacfb$jCGZ29rlb>kr+gq|z)z2%KJp^v5&($x~
z9WBsdVNnEy<}E9HMGs4p2PPUAdaP_Z^UKV>5J2E)xyua6Uv=$a5<i<kEfR+9xK+*)
z_Q++%%K70q67h;(XdBDp_ZtqKC03V2Q;+b03{a(ZF6lByh%8$?r6rO6^!hqjVqY`+
zWd0pZ{}#-#{~D)T{~D(lnLj;H2{imEDE8VjZ`F`)aB%vSG*H+21(Hq=$_?YeHcILz
zn}lHDWe@qRr`bc8p)^`Dr#U_^L3yrjt&7=ITL<U>^qP7PtoR3je$rq>Lq0D>aAQYE
zz*L3+z0DF2#L0mg0FpfVfo|FT8$X7M)))8gW1tM}gJk;)xfk5FwHkNP!H9bsG@*(G
z+dVg20{YnZ_g9tI>kj}wGawAhgMVugTm$@C0$^^0%%;DmxHfUlRNIS+yOb1QB!<FA
z)p<`z_wX*j$&``g9OFZu9ql^02Gqv!s4Yjp2$mCpd(IIqZ@CSJ46c&QIu1ky<^7~5
z!D{-yG&bZi_#2%D6Tq6&M@2;we^3Xji9U>9HJ-4QNh@6ndpHf8ttOh1J3^;G-7zga
z2XvW8IuA4-w1_ExTJ3l0LBWihg#!?zzg&S9IM)ZaV1lsFXW&5lygy!P)AAq}Xmxop
zpc*Raim^uz&!Bu^CTNTQ=0Jo5x(C_vyZP#37_lx3m!IH<(xC0!fs8}61-OeSTG1iI
zrBQI$1i%YIOpgKOc)P{{^a1TsSOc9CHI}cD;iqe&e<Jg>K;`6W5Fsyry|X3EYr(k&
zR5I3Fl0}5nOJ2SEYWGM7@9dg6HQpKSa6Z_I`Hlp9+^AuzGH@(-9y0W7Gp9Evku7ai
zVu<F+4)Oow3UnX~f3=H>#Yg8c+dgxmevJKyZPSR40(B@>Y$g{N*ivEHip?ToYgdqo
zQd4_HLa)Y6<|1tY--M6jrL(EFMZ#De^E3QEpVksj^gW@YZSq>_AzN<6uC{@YdtE1G
zxD$MxI3B1c_d&Bz4O*b!F5<t<fLFk639#`!zDxZcQ7BVN1G(+3;Vi&q(3Zm;p_B@$
zlnpliR~O!`#!N19*McmMz;!gqbP}*y%Yso{SJp5^C~>gdlVFvY=J++(99#yMWwRxL
z;EZ5#2P~hvZDf-%!Ytk~!dqA&KJCjK<Xs1%<TqwiSTQ({RpaPCu9jcmZ};H{!#{uR
z9CEO8iWTWuNt=K;xdKkVI8fY|x7gYY8X!0UhJImyD5xjRdyW%Cjq0a(3Xj}Y3XlcC
z{OrGXi8g|~iBLCdeXL5TahST2OsVmMallfyk|#+4YngTrd+ml-Yz|G99?Vka>f94)
zPg+K*JYM;>J>7ScQ*N4^h?yKEkfPTeYAoIu1$!9M(F^l}SDb@90Na(7+T&MC_4WJt
zaa7Rk8K9uPeiidkTakV~ehqzubVK>rTux%tjfC<Z&e6w5V5YtPKV}-Sk!S5+TcykF
z8ZGotl7!){>(zJmnBgBxDaVZXNmd-hd<C)0;^URpOMNf1gpL(cO2E_Ho<6_R<nsu9
z3zY!UlKi*W!%tXZsq29yp=;}U8>HhJO1hOBl1&oNr!Zbw@37E{VPB|sZ`NiH0h`5R
zazYkAC^LCm$H>h&R-|GYa4%5H;WONYmIjF{(X;A&!X`0qKaU*|qt@xZ9S)X%HC%<b
zA-?4ra~!2ti7Ivs9F?;u3>D_*HwL}}{EzZt)D8Zyn;{P7Mtu7_b$jDTc8a6^wZpw1
zeD;UjXWuR_&IaqCt@2QYwkFFSQb-xPBuI{~J#`)gg&O`(miOM9O9TWzZr`f@mq&I;
z=RfEqn`9n`i2^;U(HY<0-&^$K)#{IX&mTlPF%v&ni|a35OFX`u<sm(9;IR$O5~6$Q
zkKbFIQ1@)VOFQ{Rw#LISiAh>^ZFyVPqY(7cKyboWyUd6ieTg69DNw|$_d}2I<bB{A
zvTs_q&@#v@Co%b0Z6AW{C|U&sIf_pIT@X_*-{no9mHYlK;3SPc=bv1L_J%Kt2aoU|
zHm6a8P^BPIqJgztxihDk-#Bl=neGb2tK01Ghnb+|vGG{5m38gHgwS0`rgMV(1&gIP
zad_FcI_OXykAm|_*Md_;U{0Wv{QZy~p1Hm%+Nr%9cKekuQ_fI^73qS(%Gqa)&Fzv<
z++j9^X;uLB8o^zntzZOIgwMt~aR}-64vc<{ey=bRdTjanmT=n^N7y8^xibI|HYC|^
zuO8eDNZpJ3$z9!xVU|~1g6%o}ZM#U9@FPXZwLB1|gn8MO%=3wAevemn+MryuJwtvN
z{XOQToBzaxX11<EUwx1>W!!W9knPLLLBMg}1u?;vV?Cs{(#O*-0XKzI?Z;C6Dq1J*
zR)}YBc?wC-yd=qkZmv`H=G}rPzrV$&?VmhS%F!;{_SlMAUY5?f()@Z<_vtDLm+i}Q
zG5=2UjeDNOQkgb2<!`LJ-sfM;3%)4cmU$*s9%jhHtw>*6ve!<PE{Y^NbzXQBME>){
zJpjER$6_a25XcTwqzI!%pUY~}ta!G@{G=22Pz4lV)HHTE1B&jk?GrpCt_|oMI-vhp
z?f(6^@$vD%Y?40;=Jbf<mK3PXbw_GGmTE^V_L94R)w-P+eeW)`?dBvF3IEDhWGzuc
zuq6<Qg|3i<9SkKy!mHZ<@d6B&W&uO?pCs)vH=^E3$EmV3UR&Z91TpN0-|(d89Ba7;
zqBmrmC|+Q<hDpFOon`p_NYFlRw;)=6zpOreV<>`VCubf`ADCqkw>~52JGzB(V3yhk
zx&J$IHnol$Btzr~hHtSGvcW8-l&2%GIfqV<)C*XmC&>TGXviB)T<_Of*EhRoV5~H6
z=AC&fqgqZ60*P+cXvmWCJ<#`lmOQu-x~qjiGW+<8wIM1z|K;z24MTnwQVf5gSob^_
z)xcFu3o#+-d%5}c!?%c<JBiFOB3ySU^y`pe`+DnD*ffv(k}pQ{{LDEsgVbN$r_Gt)
zz}0u_W&c7#|Ca25cmD<#I@Ll(>7A1_2Xql!QjK{0fzR;QTVo}U2f5hdx%+rmy*(mq
zIjZT~qy9_R5B?M!sHw`(pVZ?@uGZTru_hKtfMbm}|7IVXce;+YUFkhb#k9QKgP!B!
z<6hx=PSFS6TF>&Sh5^s(40}$|13VLUT=%l{MLnu#y{62RGwT;aY1OgKXX#{5knH&W
z<88&)&F?_7<jHMNCkuEI9y7MKP&0|uH+rLHLHQX#;8y)ZREclS&SAk2mBrKWP(UC|
zx9(}b!<V1gkwgjzNC>C}WU_*uRfVQd=)<bhC9{6-{@>F$Sxr`rqWUlx@YvqDU1HCb
zdlQ~d?Gf4P)+L9o2o)r^CT<`h)7|9mGS=!XfnmXJIY<-X><~bnrmn-h+{z@TNL>d2
zb(ib7X&V=nO>&d4oNe9v4d_~xLd48)MB97tmlC$%1yD)DB?&MPn13z|EYLGjx|z>2
z@2+}%;G09odnS#Do`i#%Z$tN9<TUthMY$U4nj6HV^l^>W6u;L>_N#h0xg*WITf@t{
zURa>#x^0{gX@59DY5V8%g-2&?N{;=ZzK`0O_<FUMK%EX*b2f>58E|3T^X4qZj!``;
zkB+=IEX2db)3+wv7CR<U2zD3KG!bRs!yO5_uA%OLF~m);*-=^nTUFz1%bHeVA{YJb
zF>{ptVs|rfTAMJ?NQ(A&@h5=j|C2VigG?=yPST~VP(DrT;iq<un^JylcM&FXsS44I
z_DP;$Qm`ebiAwD*^yn!q0pq7|X{5Fi{cN3N!aJKaSit&Viv7(+t?|~^4SMHFY!c8)
z{`>;LXFs)D9ednyBMo)%@F(M*Ffu5i``jZW#Kzn|nN%AP>@+7BjLSiJi>QnbD810v
zaKs00@|im|F#T(AF&5m0hBK?OAAg;Amoq-oP~nQhv*;dqVy~bd7R|B0i_Ori$t;OG
zB5mrz#MKTLr@jXgiu6$FQqA)hkB15N`>YR}9#ts78;Bfr229V~+S@i1)7+`v2-W-f
zDbd$-&%Z2P`#G^8UvXKN-H@{;;lFsX>HNEC4O<N~egkP>aQ-mJ4TV>~lF6)W1;sEP
zKzHR-Dg4Ko^*K#V0O;&}op3MUlITm&`xQT3hF^8P(+u@o6|dr^sVrnbSWMV!jEiz4
zP#TxxtpJ+&2<mORJGiSalDM&|90Y4b9TQT#?c>7NC&cEoutuhA(5*q*3+|_&s6>5x
z=o3gyScO$Qo+GH_0^5v$lvTj7797hIHj9DZV;FN7Fd_U{kyQwhLit&d0Ii>2{A=Gk
zo)@D>EaN2)5_y%Uksy~Pf_bbFVZiD;E&gm0$SKf3Xm7sflh<7ocW5tfxn%%h%6kL+
zT`Nz53sy>E!xT?{;+;-ZXzCyJ11MmO?OP!i<ioEbtb|}~!<l%EWRpLD$-jCIq~iO@
zDh1^Q!Pt<0K~O!fk<!KOoDda2&Y-1?+G*2DK`L+#{Hg87N<EN<XL^f#`O&!E)dUj9
zg%yI@jnCJdq%s#D&E%*Yo(nuGwlOndG0@~JU^28gZ!XZ&Eq*dn!4)RJS6pz=z^Ie2
z-$eIxXRSbGC|UZI7P;5E=X~tN$8>Q`W6`fngzv<uboX;=4bR89SoA-!=;^ib=Pwp}
zyZ!D?wr15X#T1nU7GGL(mY#LL5jj+89GQvZj-B-ZrfgX!_lo}-79<;~H!f%Gf%uu3
z&CswxaP!U+lpe|)qs3s8DsN5vG?m;BWV#_>q=Of}%tT}rt;$New*h=dqVjkR_9fRI
zQ%u{aBk1KINNQBQr8D8pJN#QgPmto0z@Fh8)Q5yQmVMsjI*6zAf;-4=w|<6Kv;cLQ
zwH2)@FGE+GO@g9^!eM#ZaipHga{>co;2+pmSQN@wDzFV#KFUqVN|{zTcKU%XHV#J^
zFT(AsVPNaCQlv-Wuy9K6e{S7+0{SPEaSK=j@Y{+Iu@dqiw~B7cCMi=%&&70&QGc?#
zdof%s*r3Pse%J@WQYj3Ie)s{BgD6B!<*_W}@^wWirU?q~?IlvZ6$ly%%l)EGk{12;
zpqFQ?wkL#APoCWC&lBEs-ux9o@n>o|wuy5e33~Xx-IMZ)wOAjQ7hhE)o{@k5P;l@A
zj#kD_@&|j%AM#atSGBA7TJZ9w$AC%pT5qg&0dp`nGtpuGlwb6PNd6ECpCYO*Q0=)0
zgfPnxU5Dhq7DH^IgLnEAG~ZzZ{UJoMV~87r+=4{nZg|1(v?z?yIQRlNgpi!5odwT4
z9$kgcx+(jt=T1J!r1#N`eSYZ}=Vs<8V?#XrPOE2UsJWcFT$LqM-9?Fhfa3l)=lB;U
zgD}TH05xty*<l$ijG=v@FlV5eHCVzvErj6cK(AL4S>>S3tFnhcBUBCD0O}6L-;s(}
zE&zKX_*xY3&$W4A25cAM5<QZyv>ziFm}Z)3muEIYbr&3iPE%irlA8bvHw05nz+ZzX
zwO2|z{n6Jq;Dr~Zy*v!hn>EhQ#1D~gTN(Z+Kw0+kFx9%tbFGapW*sEykKI{DpCvzQ
zpgX-M-Pf;uI8O8VNxo@`^$6NCGAUJ}b3Da9Cqnde;>&f)PYaI)E!rDD`#0*8*u%K&
ze~8!=M~&w689XDJ4lsUB9}s+P<uRyVYyJ|G-{|(8G>_astT^1Z*L)VTm<Oe-wSJr;
z;w<1385`XFwBc^}1c@76EhMOZ#xp|VC!^?YPZkc}O+-E1p2WlWl)`I)Cs=V0#`%{q
zkYnKe-F7G*0HLqngHhyzDn8V&po1%7%+tVuV#l=`0&K*alpP!>52h7-aF{K`1*>Ua
zoy}iGd6RUo=r;z1A^Ev#v!f5Me+HtohS+;y#7d?Xt$&E^5&CDVxvY_#{7Y&@`Y_c6
zl9+VROX>mGKZBR>a$+KoTD(quTeILEQvap2Z1slMNPcN3ZYxPPc|*NZwrxXn$0#t<
zr6sun<F09=fzR{LZ8y-_mNM&$uRA6IW8R99D~Nc^Q=+dmQ;#(>#R4RlVE6<<5f4{s
zgF{aV6Wff2s#ZTnti;b2aeylPIBLnbv#^?iYQR|5&<XZf1;;O$3#=Fdw?l9X@!UHa
zlE)Va>&OP0I&owjh130I?XD;=$h1wc0lh`-wQZ+hG@<_Ww$~+s`Q*d%ZoO46bMPUa
z)$2FBV9KaidK6l=2MG&IH3I}P0mo*GC(c_eCLy>K)EO-vo&#JIOkpBuQ)G99@XEd%
zbQ&0|H2mo-bk;%37!mv(GXK>sz3z9Lp@EK_Ciw*zT4jt8a#LR%B^#s`29@*V-fWS~
z2bl7<=m=2#GCsX~@vFWYaV<*=Wt|2*6?TJSyL-~F&PBj%>)?gPiULq5iDgeQu)k+G
zN@uPC%aEt^S|~(abT&BECFzYL;a;7ThVSd!U-ZA;+kU3Ni57148)~Nw#~kthHbXRs
z&|~@E-6tk1hcNSOPBTUu@ww0~T>r&Z?SihWufP{ncoYD7lc}de^mFiYbKE&>ZhtQD
zJV9Hv6Ny$Jl3ib~yK?Ma{}g^3Y3Xv53PKfXuJJ`b;7CW~+gNG{dZTfCHZ!y7Jlf&S
zgdMotpHy_mk@n_5F<k)$r#Il<m%OatbrMA?ltn8AEVqX&6eZjet8)EnzO=yR;9?+M
zTC((W7C7dYd|4sNMz{W;>`A4Zg2=qwqTxxEuJztwY(09AubrXBasySpsf)OOkp~rw
z(1hyM^*B$wbRmv<5-m6?NdtI|%-E{kl<3hl;aQ8qxRt}4o3c6b*>>ym0xx0?wg;lL
zyUOtn#7SLQKLLpmiUG!!Pxg!>0DfbIAXOtJU@UH9fkj;CZ*m4PZhboJ5XEj*7ADZX
zu=;2Xc-rfDp3)K2DpGk>*qQvIW{|JD8@$)W`o<gQyJCVYI69lMqbc3^?;(PoftFzk
zI(SoZEy$j0G32S-N3L-Lg~2QRP{W{#0N{5^%4z6v#mT~}e+hry@&(G23$sSMvKoY*
zSOTtq7A%B(+-waXIBOMIhxtkOq~OF*qaL<`!H?%K!NMzVmBhz?9ZcpmLXu_J(}@$8
z18g&So0MG7W)yoL!jUSlA|C5Yn1dy>J}5R+n;R8;xIHtrLeVDEns7Z!$3@DE9}o4B
zi7)2f0daGiuqDCVr#p>$Epjf5h>eA_&Qh?F9`s##{lBgx&dmkR+v)L38?0*6ONDUL
zd7X3->h{+WJkEr1P*LWSxERJs{__fzc~yaM##xNNz5Xd;^{wFmv4Y(vaAAd>4}?h`
z=)<q@o#n%V1_dj84(twCZ{BNirwBqLO<d|brFKFWd&Roi^$exRTqcU%_Oc;I*De+5
zO(Qq?dTfxDBUF==JuaU&Zc}I?`I+B0782&=xZ4q-eeKv@F!dlg<BOLzgj~kmg01_>
zmb-AZ+P;#y$SiPPi-wW{O23XtZisYcuK>7e@3RuveHAeA>Y8J<rb}=Bnmy>Rzy=!_
zWN7nsb6Ieb6K0<+_^>V90ZV-h8`0UrPiGU+5s*(ppMJE54l$-m!qj2Pkl>Nv17Zdi
z&35q@?Zl$gR@uP+Se6(jxC9!HX%Py`E2QEB)0`Uj7{=3_LZHQ;N+GtjFc>~7iI%Sl
zkKOy+411VTL0xZEag%!JM(UJm6p<FV;MtzI>yu-<w=VxgIZR>#xD-z}i^~V7VV-E$
z65K7t0t&@j(8v6KDvOfW<|JS~Zj;L;0KMDZx>rJV(0WV+jBr9TsHwUhnz@hw7IxA}
z)UH9m8UO@dlY)d-^I#HZok1}SK9i@Tl;n^2{Qxumj03c?wIlE~TVnKy(3|BNDuEVJ
zehMAjNYx05f)XSFErOhAm#$-nh3NKs#yAl?1wiR)1l?!#HU*^`^;j*WPg8cUo!Y@Y
z77B$sDBpjNWnX#Nl3+4G$b{vVq+${#{$A8yefgEpzUV?8{+9gowaLe>>9D@QEBKv7
zJFK#Ux*<a#6`=B~5WW!(957Y5F%uECR^<hazu)q!X3ENv0t)r}%L8++NpH86UJT$c
z@OjVq<lk2#K;qpOah4$rdsMd}TSw7xV^(ZQnaukA4&%jwbf7?Ak2^AFc)I&Z5;0Dz
z24{S98_={OMb3!d%4YxAX4()R>XQwndpIdb!gL?@CN{Avqo*n0%Kt#3O^>W<)e#o9
zI1?n;p0Q}EO#QA@opPHYb2o~Jl$D<k(YRn59Hs}((3(T>@A3$F0mRBAQjVzY&jLLJ
zpj^fd!7%Ao)<%_i4!oG%Iyd5ZGoxTZr$LPjFy`6>ej3>?huTeeOcG?5Krl=YdqL?f
z_&SkK**=6*zwp-8usO~7)={R<1ZT3zt+#zFu8xoPjlO%+tM!|f+IbCYeYcjbDc<A7
zP?}Yd>ZVD1q$>9In&}gStMQBy@7|@)yT)@m9(}nbm>S8<wz+`E@1FyDEgP*3r8(w7
z+#y=T?i9O6-FL{JE-71Xju&<vGMo}G=(=`ZGZBW!$CUbGyF=u|O?2>l3`h9<e=gzG
zABwdmIGz98nJi6_H4S=#6I=nA@@3@<#UnQTzK9G`d%(Y9+(Jx5ZY`li4_?tNYS?B_
zD?Dq_^PUy6ykpTmsFw~}xk&4ff?#8}8$O_Rw1m$W@37P>2;qFsGe&s)5@BoQ?6y(6
zhy|O9-NIo`r1VNX6)dR@6eLx$J0M#13ezc2x5)_V?mwmNGoHRJj?emSkU=WIY5H9=
zJ#vabp3%nY#A%u66$hg#HK`D3&ES9@j6oJXG~f`7$F+}zbgl~p+-j(x6!r294N+-A
zxke30zl$F`VoGO$=x<-X<dV0y-+2=H4(ncS=Iv|e){V11%5sk?RxMTAu)fXt*fABJ
zAOmU_p153t#Q3C;^tC)P`5>;|^D;)J8MQL8X`HAjV;=G2wYX2V#8fSpso6oh-Kqb_
z7gjxkVwYv%ZwhckpCO*Yg}N_Cl&8sx(lg)iHs}ioB07Qn_LjF_iY2*otR$8^K?hS5
zlD!SORdrnl%jF7W%qgT;yoWFsr}{_sGS}_BDCd0?Y>}$L`E}qz0f9Mg^qqZ5uL=ef
zR?9NfWp}dT=88y#fo|%>clIJp9nLrLT{M-?O{%EcNaP{f(bSG1Pl97f^-EfPztudo
zxeR3=F}f$#_5v^a4H)sa4@(ws?Rp}~cod#oM~U@|nh(uCDNVe%&44wx9a3vAW}#h0
zn^pJFl(fA2#F4n15pjZY1bSsWNN;fIHD9^=JP%dL0LRG*fHjJdAlN>MJCtpXJF%ww
zVBLBYtdd(4A)q()D>3orPV9OL<=fAl2R&IIx>q6rH5z*=2{C8n6XN6I_NIU5+v(X)
zR$6vCm+@tFf=#j$#C5+DIcd4M^OuQ$vtd4-;Jed#V3(Ntous!vvWDpA=Zn!S519=E
zZi5BsAdkfl?{?l9e2M9~GQOE0Ums&Zh=kgZxR@Go<*mKZFwQ<dWJ^BLUSD>+{6l*&
z#o&{pV^x@Ele02#TbN)y{^~j7J4t&N;foLYr?Iz{a^;b9`}1WiqnU~>r_Va)KxI%l
zFW2L~4~7qAl=f{6D;+XsWtQvE6i@t*ML9~!SIJLIRwgQ`oMZ~5Jo^_vSg-9B`$8_j
zHMGx4;)j193^9IL6<$60Kh!SI+OF2m=DXge->(LO;>{%K%00Ks9TBqKPb@e)qwZ>*
zHQOIeK4Y6qNgPUvUU&Y+u1G(StEfiu)oR6aXX`5kgF(_E)pj?hZZ^qm$KJ{f(G$&J
z;76y2N+!|78@D~^uD!#%&jwG#Qlyv4QmVQS<(Q1UhpQ#{BSPZID>}ZO<!eY>W*i8E
z{u4QB%1NXH8#-@K81?)2&UZuLCUsRLM!jb$fi6A*IQEa^@vP_T5!`aN{6%HRslA`8
z>OvO83ZkVCjLlDyQM#jIER!y3NAe>Wl@o{>WrSh;u>0CeQ$RH23epaa6*%WhhnTqc
zTio|F`6c}=n`Hml^eoi^Ec`&U`k2>(9Yi6t9UryvWqC+TlxNDjyI82f#WSM%#314H
zv{v#50n+!@7x!!&$-^)uzF(QQkp3)tonpIHRl2&a`%~NE%rP6pm%_rjxmLZi`-cH%
zIS=DzJ&Gb2wT8YkH9Jdld>G=kLNSEFG#;$q%~t;4?)VyEE$#Kr)O-j^>goWU5IXdi
zYyMfBKp+mY`}VS4?w!RW^T}V*){qYeed}-ga6Rtl_tLIZDl4fsYi%HQFX|uoZ<;>T
zmlQC+=luIKc9G(<_cd}ii})9XKrJp^t+M%39rYo;4CY9$U$7Y7vYc?ni3hn;Zw3V_
zUvi3TG#WNdHSIN1>Wkg_ARk+vKeSoQ{XR=(bP6seSMb>c_YJ<pvFyQ?w=Wi@>3(mS
zg39BL(H|b438db4lc#l)P>#UsTAEDjr})eC;raRxcXQ&L)u<N>E;^R$<ys^&A|!@+
zra6a&xwGZfMST2zJr_3!;X*797zeb3NmyRrr9~_n6&JjenXh?rYnV|#DyRTQ0b!bE
zU7u5QH1_A<a8#}{ci<1<(ee#^9Xx|FSi~4`d9%C`BMz62b0R-=h@BN=34RioD)NxJ
z<s;P4VKgVa_rWzCJgaLkSZX#QxjLpOrwrgo%AR~2GU!EHNg~<)JbWIT-25Br1A;z0
zF77=I1Gn3cZeyL9&WDFVnK)m&Kr+xx(Do+w21R&AX=^0Lb7V(np%Bf$`a{v50jgf1
zt#-@4V@9Lb992D2A+mMp+P^9fir^?bF5&Ye`zn^Za9yh*m~(94Ei0#<UOjL<)N%$3
z<q{mGWHOGNzV6CPyMGg`x9MTZa&}}CSZ^`AF^cdOK+BjdtxHMyJ858l#kjAHZKX$C
zS(~VtHn+u}sT=!!((GI5XKH@N!#!Ul>(;#Xt~Ym4Cd0yL6tUh==ax@Kldi9`_;LRE
zeTOoqs+<=-Gy<+)_T={{y*?EL3Rbu<QSkom@4`{mt~YP=ESpT_Js#)2Lt(h{w((WW
z{qkhooCD6&sJd&z6tw;mAAE~^M{nE>8a)X(QuU!U>#F5`DisX>Beql7rF4GeZ4oR#
zT4&23{&L`!r+7mc+|o<m>Zf<wncLy*HeYz)z~>ruVBY0?4?iB-lz#R0>t*0UammHU
z|Ipoqh(pb-B`)(^)y7K4*HfgZ?2XOJXBe(KzwCh1Y+bxBpd!K;<lg+xLM<+4b4&JW
zo?cD3@2%5Nx*ESRa35fI8nif>h}`5kJP)EDRjm(P*SO~rhc~g+tr5`bm&44oX;e;R
z-xHgL10iD-3VlTwKV@h4>CK(Ca_vTld>eYPeT<y#yJFu@3I;(KbmaQlq#mC97$`@M
za>I_hjvwX>YJT`(Obg+`s6PG`mcTG%5f_YT;U!A<6HEX;Y3UEGl$Y-ZuKGEs)^lEJ
z!Ypf8+5!?Mh7IwB=$23qiuB}`;++*Xd87A(1{7-dZE90-K<+p_E54p;qqd)IOw5J0
zyjpmTvq&<hf-VXY{n`SGsw;v$I#jO_ld>UpZEtVdKCGg$az-%9ieY=t2e2?bohmca
zFen3V7Z&H<_k?YG+CkKqWh9vHmj<ABOG+camfuR4!8(%KKTmzt9aDNAZjA8&MV8?G
zdEG(9qL}77k#HS9XxPB&THEV#p6~uWsEvh>{mnD1f>VrIHD6>Cn1^TFv>TsBr?GCr
zuO1vboaVNjv9EXgXemWca(U2c;k}skQ%XDAqKfzbNSH2^7<a44t6WQ9S@uG19I1`F
z3#TTRn0mOIO-1!9)@rlUbGe46%jEi}3KMY_yVHYlnj-n0YA#gEoWrA8l*7yO<MhCT
zV17M~_L7R*fbRL9g#A|;$-+dUwID}E7bCH>0>NKF@!8a``GN93!Vqx$G5LdGK4I%V
z`{wWT&yQ}Gf3t5J_TSd;Ld7$c4E?<b7TYYqmCht`=GWc`d#|<P3#rtNE3j6)jf~Z&
zmM4_5aauA#NiNzkPGxUG+rDTg^=?qcli?te;aa2}G>$MA?R45u0z<T}+!g<CQslKP
z3nLu0@78zDZb9IPE7=@ba~i~lX^#ddXZztNd8%)+tgU~}L%bKgOi8t<Qrsj!{*%;*
zC%`=ud1MLsiWka{$0`vD5BaEYBbc3tT-8K#O)%&5b*$73!ONkX9`tnE3taqsop845
z_(oW7Nrm@>fg#P;fm7F%VEo=x=Uo02KB%=d=!ZQx%i?}%*OgEXXaESJnhL{b89)#q
z|4IMf@*hwUc;9uO@x&)3wU9xyhbdT_1<}z0Q?BuuC!^rS)yW?gA~qW(Q13ig1JZ!U
z)D8V|dJ8RjM1k!4ry~AoCAwM}4_EiqVjK#SUF|dW?#O+a@`}p46#<FCFj01t&wh;^
z!5*y@<ZEdTQjUJ`^-n6n9(N*^teRYXb1v}P=?bh4&ccJ5MfHc{^-7I;x6MrZN@#qL
z3to`nv`Vh=6AG`n2}g!;M=Qbc4|u1juFZ3t!Chlwt7@qM;=%}tAs^W{K97J<GH?<h
zk*klAmtmTwJM4M%!CVeh@Qwaj9XI(I_Isi+5b*!qVM1DtzgY4gzu}nS+>|QwiJbjS
zHK4*po*n&tZ}v@m2z8K}VF6u8Do+p2NVUBdg^MsK^+&2+%YvSoYyY`v-45J|QKEP#
z0+eArXComWZP@MidZ2_*Lz6C_NfDo!yy1Cl!^~53o?MV-m<GBCDc8_yN_bQ1(L#q2
zlrb3ZL8d_S&?Tt$txZ>HI{)#G@wr|VmADs70#A-`(QYBq%W|g&58{tz4U!FUjJUzM
zbhKlMDp!5G{V`bny+XKGgO*d%Yt_`|GOA9(`z^R2_pJ}?cu<vkZFmU%De!-rUSL+1
zy&m)SqR6-<GDqHlq<$w#e!r4{8vO*=X;|CQruEz7e#z^%m0XgVlz7Lb1VJW6e#QKT
z&|Df8?lR?1;?N90?^5PfrwQrP9v!O$y}WMk6m{w;vs+amS1~Jt7DX)(IeXOFI<jtD
z=V0HmVOYpMMwm8lwY=HUG4Bth*U*G^e#YlFUmLiX+n=SQwdJpn9-DAc5(s>y8MGN)
z^{exXi85}ITsTja;@7Ip)@0oI5*eD!^hz`Q&GzV*hgcM!fy}n8#q&n5Y&@0+^;CFF
zO2SA`jA_6*AB0zmblG=<JIUVN`?CdYsnvhp>W$ayH-`LcVjqHu)vZFKkO-V|5`<@q
zBo32|h))%UXzFRiS;Wn?aMnyga5d5nq!@3i{KpF*b7CGT%*6B?REG!ux~)U4SY^ux
zmkSH7KxyBhjepgwvUE=DOEDb?5rDU(NH<B7BQfLuJp6wff0Sqs>!7RP&%~Y3X<SO(
z6*l5-&dmErtLEacu}2BiqS~%7`zJTaZ?in&Z}VQj!iiguC-w|F9?DM+^R5VW%<>(Z
z*!&oznq^?@vzaCST|<7pFj<!7zHc1Fr|e;ngjpv!tB$e-ixw-M^7PG)2$QWFLRoTk
zHz$(en<vJZm;QQRp-Zay_q+Q8oBc%E%7yMYwE5QSDzvAMZyF}QnhrBkm6tLOxivq+
zo5C>@EpCw@WU;rSOtE(3iN7YpljFQlC-L<?t(biQeV{r<Gfj=S@!-c`w1#5n)^K^k
zH~)o#Lcfr`zl4T$av(J1r7AzwUFGVd3oSA~p@qQnnOvh+XOwXn-#EXr6wxS7osp97
zDXi9hF!E)C?|qDZ32=jW26Yp1DT^z7SSUOIZrBH)5s3x^ElMG4>Fr0*xTSrij`LAf
z&UdS&ONloy74c^%t5!!&8XjG?7d$Dze5neYZ%!VJi;dPi<m;{i`|HV7(brs$s-mS;
zo@)i#=f8M(S+??D$&*ksHCe<*t&qsak|Z?oBJ(7GMLYiZf;7K>o*XJFpAwr&J&~Up
z>aq5T)HFd<<#~c4b!dQsP~{aPg8+9;<I5lOMl^?sUuTOPf2H`Oj2e!4n3DqOFX`Td
z?I1tutvIo#mteEyTr{l5&*#*mwb}9MMA>W+;`AZT0eye3;0K?&o#8>_C5`b)PfQ7|
zE$HjfWrBA<;w5qVy*eKiF$#HP{8w!QBFY+%m7ch${>YPL*z@aONe5DI4jzauMOwZ}
z$1FcBC(ky%czb(<Ne`e6&Zm<h*_d*cnjYWGc_aV3z`#GCh_aP;1!&!dBWd#H;Kz>Q
z0*XQ5YFAgXu+`Hoo+?x40!*CX68{v*hLxVS_;u~S#_CQpwT{xJ#Ngikel~rX)U$Zp
zs~?91z23J3-o=S(Rh0<GZvo}xe)2Xq1AfQ;`RndOPt84*K+zX_wd|>1doj`HxNPB{
zAIsNFEnU%z|5-6Y0{^pn>L*8RsQ|MNOCfieLBc|fT;sPQTbQA_DYN*$ys>@WcBNdr
zWaEYQbLluDw$(U4nu(sJufT`8T;`@3#~N#}&gWrkpgQ=JJm>qYO`NHr&{@QkXTNB<
zNgYwe(1l8LmtO$*)xU-+U*3V*PR!6me(ba+@*lF1OR|ZTMPqc&;V2$WnA<^T7^JPh
z_t_w6ipFZ8s&QI&eXdTS)c&V;bl#2!A0;MFC?+bgKjP<F)GmMD`c-V9`X}{a3y`s#
zX9D!+#yqz8?#z-o8R|4Lh|)eN=isY-^0sLB<2UMQBIz#$@K!;hpmFO`bFb=$xRLBu
z5!ssh^EbDpNT7A%0W9JgskrUNHxfiv_0*N*rqw7vFn;X|nO8n>BK4m8RAe#uwfJ=f
z+3xzJA=TUX`C7*K$;SuDYEe5i@hn6WaFrUaGD_y-USd7bl;$7Le6f5o<rbgK7iu}=
zcK>)4-3M;KVGJb*Q#!DnQVUOMQ=-v-F}IBy5=muGhBV4Z1J?``5cd!JjE_{=kV7$)
zNd?;HOJxe5@+@=~a84{nihuI1h6D+co4Eba8C$PqQ*gQD)gM*gvrN(c<0XyNi1xpb
zo6`z-&o}B5rH6c)aY@}=<Xe=vWO$Wi@_2i`o7=22*y`$1TL*7-PA9_8IDgc5Njk-^
z-lP2Vc*7WLEAJcbpx#+^cX{M1&J=NACrv3`0M{e644%^VX0e+ZsSS+?I($ohDX=*I
zQdi3LdA-hXCG#fb=<kG&>;`L*E7Pq<6O|q^EKcL}UnYfKuX}DK-zwaH@}UjmgWFdh
z%N~^vN$X>88cTQ3RrKBTZa>0N$vskW959~Kjms}3aLj-Gh=*8hCGOz&m4%_8LEfhK
zH-mXbD)$8}iCi0+gO~fg;eFw)0z|96fP|8N?QE?9pR~6xnWEa+I}wi&u>zemvVgS#
z(^^s&+og#*Zs8<tvU3%j<GC+$ZxrUGOLB@=-zPr&9SC#Fzx=5PB~91QwneN4)#rrC
zSfyr*lEBE(%Qv*R@PJn+9`}1djL~P4g&kdv<2_jBL9-EG?Kme#;=^8c>I3f!lK|&s
zZr0a;@;lGG=ihwkn$eJd(B=i6|D7q9zWR86Rsy*C;^UonW%CDzDuP{Hr57#}j8o;j
zbDSTpBpx9)GDXQ79dZd8612(v#;bm?o8iUT_O7nl&aDNH`s{br9FxticAII-17{cf
z_^Lsp>!(yLqZF6H+irbIEg93M@8=76wF)XRy;PgRf~py@cuSt&f0ufEoV%m1I>>bD
zVV77G;*n<)<Rdj-QZUYw&R*KgbP#iI)!Fc_(l713V%f6oyf-iUJ$weu$MrO)95NCE
zs_)yCT?Ob6IaM6}`lC<Xu&3|mw%n$xqacJHKLyUksKj9w-(_7xR{~N-%IatfQTo_s
zZ@PEUpY1EWFQ<9Rd1f~Le2$Vq-S9#vvPoizuDj+?o95UbUk&l_mj9fwAIASptQ9Z!
zr{4eRqNZnA^n{>OT)OjM|H|95fNyr8$KKDMOA0sd=}T!1J5yi&FuD5i@am!@-4i)f
zDr-;p=Vv+gy`lII#+UT=>O7RGOq;Xrc5;U64-@tFA2ff%&8hIuUpV)d9bN<>rd}py
zHr=)9g+QYo&8c4ZhV*04WGc;=T@jkEeqFhbb4)Mkx;`Al-=`h$UYIR=Ja`OiIzYd?
zemmq)5;zttiGaV2?Nv>ZTdMn=DA5;{nFDv**kJwDg0uBqHZS~#mfa1-*vUuE_H7&>
za@bja9b1f(^UpbW8YjHY_lr((v3r+F@djh;>Uus@>G56MjU4?^5u<+xsf}Z`_-n>w
zwU#0%hIl?mln|0BsZaFT)D@EaZiq#zu>W+M_zYJoo~f^7-dmA(Y-;3q@RrDm5R_}#
za9fxee+`$maEVYYk3|dOuRfoh&i-j$>yUF@{lcnox92WRv1j|d))eDcn63W|0&X_n
zUw((r@tEOmP`}UW;ZsMX>8RUm@-rXe<isIv#*aKq$3KZl6&a}&IUK)?JK`;3AMkQJ
z`p1^fcq%fSd3_-&F&TlHQ#|_x>*Up!8Rna2y7!Yb0F7St%q6&U@liwJWjrC4U3~;Q
z`uZRF@ADEik$$tEa)j<AeGfAMvi&<f1F7Jx%d>!A@tu87%DW6SENU%`6Cdd4aHNoV
zm6fDh^sNtb+HdIjP@|Zj_a-MdzU|d!NF-9yaiK#UKG3;o;59NKwNV!q*=NlPxZD+i
zty_4W1%Ti3ktZe_8T6uQv<KZ<_rvW{6qy&+&R>uFYKQEKP!7_*J`0GUoA)GKpNQeo
zZn;@hA&B_(oaUuDezX1hqgx^`%8wphUM!oxt96+7o{;|KbvhH`D{?Ac`|Hfdv86gU
zCt^uV^wL1*l#fz%p%ELDt%XMXPD(Qon{vN>SvvpVK`s6GZiZi#LP&yOcbWT3h7ezu
z((Olb=@Zp{?+NTgOlg*8&NhGii6jt%eZ5}5fB4Pyi*1xY-r(pi3v_{WP@FfpHSbB-
zLGllkQ7JF9=oT5lb%Z53;qsvQ9S+YZHxSjgiM-49rU8aT%u)`8V=6qPVb`%lEuxyT
zHJ9VLBBI29DgW||hw9*od&4zbn)F&9AEvkOFJ-4;DG?&3r_tPJtTgp_p@p?7Pa9iX
z#x$VUHG!oswtWrvW#6pox^R=({rdY#nMGV~)Bdi8OxU8dX8IkckCf;T<99|Hq^{0h
zCK5fY?0gy{%$q$Y^WrspwAc=7!%t1-DaO-RGj%InwRxnWfGh<J&>prZ&ZI~I|ER2M
zDtvQW$34H(ek-S`j3S8A2zV+M@p)Y&eoZkR(S%FD)Hc(gatLFy)9=Nxq4|q-k7OB$
zq%?-#vZVNxiR$*m>_z*^GC8%%aE5N(lfMSv{5?e*iL=Ru5^lZyY053ix_)den*DoE
zrJ!Q@D_J3P+{%5fH^{Bx&+I+z!6`fgc%n4Zo=}yZGs|vqm8moB_O`OJV1DWzQ0cqZ
zYBz?0|Fur!%|```i_^_SR0iP}QFEE;!jo=8G9Q+3fPpK9d7J%1HAVVu@U)jhrTHJP
zz~R&22w%w%aQK)+KE#i*lpAT8c{fuZa?qUMd)h*4mqxJh;}EOkn)ESKv_<}0;#Y~<
zZG{NheCPXTW%6^itKUf-8PXbqcHh|Dn*39>c2{AVn*kz~&+u+)<2Xk!ChnfjI)Pbx
zwXd@lac>{TteEaD+3)t0M%7VUi?7U>W5?LBr_cXXeE6?M#Ml&>`6To7OcWql`pLby
zuu-Og9nLuSabj!Lv@x!m_)?pa2i-w_^ss;F_MJ`R!zK5Kn@8bX{!Ws>U`dm!6|(4#
z^F3!yBBto)ev)sV*dU-;6JZSoTt&bMoC)LOa_9lJI2eIj(xuoZagvcKfy16t<4zVt
zgBbCmX(|1E#l7V60?+7BHopRwy`%fwQYIzc1nDJtMTSw7UQH7|tsn42X;rlyLT-MU
z)DzArDxV{QEa7O;^8_-RHOVlnO<-WA8oze?sGH4#W51BjdimyDz+>UV9`QZM4;k_6
ziRr@oDQ>0TlKZjd6k#c91wA3s3<aXH2#=_Xo&0port7W+x_^uxug-3wI8f~42<D5_
zU-f1zh#ym#vBg`d)=9ev(6Rc`sM;zB=H_{+?7#`l`uynL6uJ&2$~x3<%pRZ1A~ngm
zY1AqKrg~9XQwMJyPYPL*AKmP^)xP>-U}k043As-CZQ6|&stHSnZ#GP*d2ZQyxG*^p
z&dD`hk7Hc&s~O^f>hKj=+9?$CUjkFu{r)P>E{YqA9|sM_u^3TQ0j2U<Cd59)0{eA{
z$AcLKHjVN-{)(H8u{!q}W+HJPN0ad)jQ|1|5@z^*;K>m4rA<5z)9Zl&8XO4nK480Z
z+23JjDBoV>-fSrTwMJT0i$|2sachsYw$KgxYxZ!>pn&^olTZu^_^&>=4pf6|f}(W%
zJ`677jE7I=PZSGc&GO0%Y}(&phfBQt$cTtuc*?ooDO(emNC+W<3QoqxU>Rh^I<9*i
zsy!{rQbH7M1CP=ep1D~6VP03#*0rS}Ct?$cf`m`N=@q?jVNElak3EdXF8|5SoA-l3
zK1pwJGySpcem7pWrvG4m@GK!e0=js6$$B+L1nL!xM+}<Qr5HdJM#s($uMd??J&Z#Z
zj43MIK>wx^m4M!@4rTwQ%1eB^XFix#7OLX;JC8MA#_U&?l}Um@raTGp`lZ77UsMz0
zDd>7w2JCZPuM#yV(gRZtM0F!k6?^+Lo$3XfjqDz)nY-2F?W+`r9l$U*xyAaSTCh3P
z7D&*3RL@2Pdm{eY#uI0JBQvABzk@uIBl^W8tqQJ(q*{Z%OJ5H-syzv<@=ArgsfV`O
zZ^cDxn<CcV;}H)=DDKQw;iv7KzOJ{Kdw<WVhTxI+Q|hY2$&UHh$da*~tKsg@X5hzj
znm=h$5;OgONfa=CTUlzztl!=1ESvC*0e3Q%+WXYu?A2}?Ds>e8J<Q!L!JC6Yv&1m3
z*gC2#9keB9u{ELTAlXqcod80ODDhldF>Qad5ZwHI+($?0ZujK<h{>&F6k}gqtNR-0
zGRtr)4?>1#&K_CI5Z}5<77WAA_kOM9tU$BC^t&>YYkZu24i-}MQ}2=e$y-X`9Wqz^
z12;Aw*RHfS_FA)|O#Y?2xbl7C3}?Sm+^c1!B2<Hk&bl$wTTb10x-@m$5DWi6Nq6-7
z5C<z`aZj+DP}swJg<{vg;)Q1{YT)XuQ~d}oOnf6EE|5Ou(?UzoTmHQ;?46#%4^T*l
z^*fP^T5g0UD|e0QCUS<yc%M$I{YRKEdfeTUUMY=Vroky04Vg`Cjx${mA}$T;CXm+(
zm>3o<5?ZW&-EjnT2j~KFXvmJsR{_#ZQ>|TmO}=A<h09En9z^%>U7R$Yp0g0e{gWZt
zA>P#a_f(2$eq8VEpSwJY!_Ph!8pI9V%@`A|safoH{J^!@LV?HR2pm=AO7CB~1zzpm
zc;UZgKD3e&42#}X$_F#)Hpk-gEEis+5PIgnIA~r6nUT2(v3~iy%UYw7E;!@hufCz6
z<wbk>Z1vJx8O(%!L(?NV;97EkJ$tG}v$sMuCtPY^TM-_=13NIFt6Td1`t|@Gn<*0=
z{Pb6IvI_1}_v3jdAxceUhaO7*5yKI;tX`@_0c;47xJi2Im*t%O1z4EOgus%`@9rzo
zWYf_I-lJDS4cc%&v9r#>ps}5K(fhe+2l_kZMGu?Wq}GeBd3=u0E4Un2e=s3IY%b0x
z#C^L5$ClV!7AC*282tZm_SRupKTY4T(%s!ihkzj69U|S0(y1WQAe|y9-6wI90s_(!
zA`Q|Yf;60TH@tiPuKT*L=Q!^BkLP*+67ajbv$M0aGqbbvk?V19PUaUT)>y_sdH?OG
zzw*k}qVK@{<AzFK<fwQ?BoRIGp@9sa4rPns4`LmSAVI>OA_SkLN3z6UMv?^6?*IM@
zS&-ye<CjU)+{q4M*DrMn`@%sMnvV{vpHOi<%+bzA6{f{w|IkxHM#w2KgC`Uz;_9H3
z?f&qvt!Wkx!^X%7H~vJoHrZ6&etzJUi_{z^?BT`8#1{nWJ}QB-h(!Q;Gaz*vm~M=;
zXs2kzwYHoiDc(LKP!euR-M{_{$gd(t1t7$)QAI@GqNq8T&u6Wpeq~sr=g12zudtLC
z(5&rn=NOv{zV9{F;B|KKlKOzOCx9BC>F#KiNl-jai?#VT>7*eO75$e)<KHG>WBGTs
zS`UV>s3=t8zn0ka{j78rCoRGkB+3snTZaC7QCaW2_V~{NWJc=hrUswu^=7siBy0$g
z<3~=-MHP^ux%VIn1QKidZ+e*y>zw@Mpc-w`g)LW%j>V!1uy+`BZ{88y`J=eHczK}m
zu5tUx1it9v;MG~(6w`5YQbhO#FACD^m(+Zrw5TIBDNW3wlNp?!m^gvgfOMk&15tA8
zh=D4c5*Lkj+0TCdZT8(Ozl9HZywTKyOB*shMlBa#no#ymHu`A39HW^<=o1Wtw?Nfi
zRhw6?#8OIp6Q1`tOvB*Kd?^%kg8Cs_l)z(uqUqYFO=_(xLlZibyNu2!!-&vEvpfIf
zPTEo}TZzy*u!dQq)=GKqt9{Y}$g6jQXX`TV%N^fh+c*nR#y|Sgcx|31!7Cz)f8MMc
z=DL}lG1rvxYcBXC@Dqj{UcanxBTudt3_W8`m=>#gs7hYRkbBruPoKlt`t(ntq2$O+
zr`^5vy45!y-MO{v&ab#<M!<1biQ9XkOh;nNGSZoQx=cx9Hf$6q)w~_jJA_57wcHsC
zImjoIm<cq|Tzu|tTD~lvPmEHzeBedBLLgTxg<^@0K3n;hdO++_7&naKcf8ao{Nby_
zDukLoQIJ?~vPRe}k;Dj#)W$VV<F3b1LG!1hXpY|wwV+Fu38*ye2C7(qSF5ASD{73R
zOI`(=KinOw!dv4eP$|5=;G>6VOd87i;4@RbfLr4B?iEr<uin0Y@)e(0dfD=ilz0K9
zU1_nu5b7rpFDlX*4R0}3U&FS91Y^AAE%4Tq-|}E2EA~A#o^^N`s;Jq;1>Oiqem7!E
zEbXo+;k)k`-ibsyN7PvevK64|T3%{=xx^0MT^5&if5Jg$WCJ-2%FQ)326~LH?EBLi
z`3GNn1lm8?CD4CZ%VS-SdBayt4vO*bdAU_JDtx4<b(-U?`}XQbG`v#xT83j)0!xmU
z%z)%c45(y7w@tGD!yS`pYMDNCK`wzOG?PAdwo*`+u~&}vUj@WNU(n6lvP66D3gsbu
z+@Qaek`;6|<Mg_DIiYdO?;DpkP?mjK7s+$$S^ov)SmFsS`El6QjZ;@SJ{y8linKY3
z-lX&*PiW^n<Cd%4k5(U}Wo~(G{5)1<f_c&Mf-VNi2L&z_otH%=YTpjH8d!oZ;aCF_
z7fkdv#LAj1^dv*d%vg^jMMNciHIr~ZDyIp%7cl<)B&DJfy$c{+_liJ~tHI6?_JAb;
z2{urR36mi8ksP)6)p(ETBikGuqymrk^JRgb?wd^)C-D@Ca-^0cPo9qsQ)lE+HFa=h
zxBfyCJS=E3!&Eb=q?tgA8e+X2*m(by7zw;vRw|2gDCo;QiQF(XDYi6K68Xel0IFNU
zo(}zpm(qZTAwfh=6v))_4JiCErp;pJ8GeEhF7-860F{r_>;*zPeqM>1P3Wah1)AK+
z?mLnsDS@A>>l|?`-0khaKQepT!M3>U2<}|4^wlXNCEL#%H;l#fV-yYQazy(2CzQ%N
zrYEyv)$L_2ET2Y`<8M1-*MN)D=TEa0C0vXXb3|%<PTo=x^gRK&Xtij5yjYa31vu@8
z76~c>=~%u*`xJOF*n_3xI^SN)6^Q4&5l&JB)pn3{&l6IR;z?IPmBo1Qf_V@;C@OMY
zYCj&6#Vuk|jcYs+<S_KMcMA1HQbD3-!$Cuw3tY1sTPgy=P>;CFLWKY>cvH<Z)$Tzt
ztKL#itctXakswc;);9vXqU;xrYUMT=2PnA{dPbVniuF28uQxt_$PvLa)vzkG%h~0X
zx{)@^VggSgKi?@%gaNxN8=Jx`$|1B=Bg53XsMm<!<7j;y)Qt2mcK#g2ae>ZmCO!p0
z10{$Sb(~%xpu?db5S+AH)qX9#L>}lUAmVuC&*043UHJntl$u<P?O<R~1x;YjwY%Dl
z<$X}}QS*3S)$~tDI3OL>v0*`!fLtAfL1#BW%-5wgvW2TalGlP&__jO4AEEKD7cY-r
zi^p4Ng`miY0}iO0WS=UOs)06(!PWjMZjXZu+#z@+%;f(`{W)r#Riu<9Y(FK@WC^o5
zY0OmPro(+c=B_jNDzN%$_}0+1a{M$tr#^+3H6rs@Q+Ou9f`#Ah#bOfs{mb@m$^C>!
zY2nXX^XajttES!Wl%ICqY@W-G+jL!HY1-)UdbMwqkyT&)KyiFnCX;#Ed*8-lx}N^#
z8nbxsYF+zlkIyrEA(w1#_D^nysoAB?h?X~mPj}3uN4o+*HU;-Rc2=%!(P#O^YG>mz
z(htPld%g&ZC?Fnb=fYDxMn%<0_D??hyd8ez+T6D2PnVJnQZLwPZqrm2;xw?<8Jg;+
zFMnMBz91EMwXIrml__wMER3g`9Ho%ZM@!164D9f}Cg})<{>~CPoFp%%HwZjqttQs1
zONR{N4$pmNhGG5667L&6*(pXmz6Y6j(&95O97eb{CZncB<L(lr{NPVHXm8@<>kd7&
ze_bW}X|Z~~??}JvHeK5~d6CA=^DX^)zIAY50K=bs+!IEkzuwuM9pfBW(yP7%)x~4C
zd6PtF$C<oz*C4x%<y!EBf7If-Gl_}u-ncyKUL(OeeNnZ3D)ak`=gUh%clhoR_i0h2
zb?d^enKewNGIpT+Tdi!CVkr^|78xC=(<TW`ZrbApOPjk%Vtt~rPPj>wGcRK@R~04U
z82GjK-PPx4Bxl{{Le4|54>S~KQ|k|1(O0}g&v`{=;_<xvT2?I|Nmjk!wh9bXwufp8
zr7C|8?1wxAvL0&wCKzvhXLf`wfB2KXFtJmpoOEmA;0^LA)Ae&ILHFfo+5A@SH>*Ov
z-H+X6g+W>u1MKe&-lFL16zz0B;q}hk#&i=(a`Lj5T^d<`jfjsd8iH{^QG;;|acz;$
z&sO~kmL9u&EQmAKYURp6dgA^5dW3Lut;zaWr5;zm#CJPEAq=@dP(l(ffKH*?({F0O
zoBf-Upp5W{<!!r6ihf|qJ{`!8?6u{M|I)64Y#3^i54jS(mco>&9$pG*NYX9sI4E)6
z5OXc*_>D(%ULF2cSF_p??Yl1KMk0$2BksCjxhaq2<}bgCT2}(JY;XlJHs{@5)RBoH
zw*53c(bOe3UlQf)=Z(!v8vU1TY=_Af_ZuN@6Q44RRDw)4lu=c9rza|IuUo)1ObPmB
zfv-u26UO=gS0n7D2$s1--_}E8b8~?oL;@}02=+0AHgnp^?}X-zS$exqx%_;9?`2E|
zxP&yiLs#cSwdWP~vy5xY3*{X5em=v!PFnBKiJjqulP_OFI$R_fFE3$Uqd4*_lXl4a
zDo%FXci?RT+LWA+eXXciSnpZksp3i5AH}u6>q(*~Bm?Rz%CYtB^9Soj{$=$NhP~ft
zcfNrv8y?ATEq2hk%pD>g8hMbrr9!ghr@~5W+7Q>FAMb~Y@_CP=D2*27<TS(A#+6^t
z__fZ$e3fML(Ylz&UVyj#FC1^Pa`wq780!1B(w@JL-{zY2%VVczjx&wly|jyA^leAU
zhdSL;xqWUv5s{r!$kvy^*fBaPTo1@LrnU}mTA#Yxer{jbT2Ul4%9g44@cHEijIf}^
z)|GEpqu`R`tV%{kdKSs_#B2R_r#IyB5^B497cOb?_R~uB!baG~t@7(+nEIbwd>A>(
zd#~svO;@BnKk%sU-b=;j5qLE}jadRR2dpvz_BES|vI2VDw}UVqBg<g4dLPbp38Ucl
zbbiEq@nbTpxBH)(1n%OBZ*dB$W7_8}2G(%wwR8qbHh4DqMz*U@!x7XA-iBX_&IIgh
zO~=NJJzn_6InB;BT~S)7*dXltR~lh7hw0B?%=>I+7?Q|OjmN!)ykt2nBklyYx41C;
zsH4#-zQxZs@W*u<CK3A4PnBbZ^Er>OeUkKA3|tHgNh|xW|D6jUX!B|Kj|&n#cmPcr
zj4{HXQ&*$Dc;~N9myCcgpJFubRagbh$1tWm^~I-A-okZ!=N_=?`+93mzUXZ8y2l)p
zvW6|-EU!PziekPgkKX^t-P}TnDgWg$a*T!?zI4}$tN-k5SZVGRIaylTY!F?vfki>^
zEP6wvw901DRrQj|Mze5^`#}@SK9qlSmEmHs&33faePlP?bMcHzcPn1v%Fq9RS52N+
z9rh+0_ZM2C$kNtLxGd4)Z+U28@M!I@nP%VbrHRKuD?`~e4XFqGOSxlX9@8P6-pte2
zg-xGqH3{y-ZgLbu+D%`hC5s^_z>>u_3{QIAa4V?c#0N2)*d7^cGc73H8oqH;!L$rI
zMht$0fH22;)Wa2Pzf0Am+fODF;CD$fqdQG}=WNcM`dehmEi?XMd*zymNz%`<#@F$u
z5wHJ|Pn%2n@7S+8U8~k@;U9bRn}s?|14mz7gh+f`aON~R&_Rei*HU5{wC5SkfkLp6
zh{_n$WL?HK-y(=9lCI&&q4G}=x-u1Ct7dV}G*++Ak3uma$=spbmS&2qvbxTQl&Au{
zzV@`we*D%(Pa|A1{VO0z)P?XKs;$;AGmIQ_ZssG&GN_$`7A&+(p0%JsTgFI&2^q3_
zEj4S{a=mVPJv8l*)tK$lpp@=yo+mDb=!RSyEp$lYQ#;6}_Kol){b6P@fycI7F0HHU
zQCC!Lfc8B?r)!?x7B|h6Wzp{iszSzT#reg9&lO2YlbrFx)RaZEpPbo8?`v!STHUbx
zIDUq$zuF@_zDc+Bse5x<_D|@>HnT%)12yOT)@Ilje1R8EW8r~yCJ#3eeY{#Mj>Y6V
zN=Qi2b<d9IHLYGs$EkZg>N${CNnR>(r*iMoxr=*7i+ke$^;>AH5nT>@w^e*%%Aftn
zhwZBF)5vnDIlnHUMy#NxuGYP7HX(i2RgTO^lA7Rn>-#bS83<?K+ZSXyZ|WXH5j%11
z-A_JWO>!J5EgM6f>}@G1>}f(Gaf-2Q;xK@_T`m%Z_;{lLz0B=OH>y-I&}13s(f2hs
zriBpX!00)c{$rG(O>33)8@<a5ij!xd0teW+$Se!K1%GDe+^WL)vZorFeH(eyFD5)O
zm({7wvv%IZ+T1huTpFBh&<!OcKxHZ3sUZHyImGPc&Om^ke25dhEBHEbp~VVCd>i;V
zZf5`WhFlxl*I~2gTfh0C3VA~Awq?IF;zk<}yhY-o>-+IECeDYAu+U`bNRSHXJq%)L
z>zKH5k-3+}`ef&ZmP)PADKP>mL{*uJatzR!JmOPHNCLt}J7bJ4I8;XRkeX!CDI=*c
zO2!9Iktoh^AvojS$r2K(AFzjilx1|vhQAC=tqmwE8CWVcp*{Z4(N={)*J_QzX8Cv-
z@hUix+2k_iU_!P`4chQSF}T4Z=OU3kO`|{0#PVeIH%yjrL`M?oRUW;T4=hS3>|+FO
z?uSXkqUh{9kC0Vui}oVQ*g;)4W3}#yUJmaN42(0?FMpGG^rQ0Z=SvWE5r&vMT@`LU
zB#J4S#_=jY5r|El`G1qq<r9C7^*Z+{3#=#Foi}ijBoK8{`!2;vcEy+oO1SU-D1Bql
zTgAn+U?b@6l<zPqC;CPp_~{z%okbEkrvj|!_+x`FFM805i)#GFL+!f|ZC>8oXQY2B
z2XOl^+7B9`^9NQ;A9pyUc~sHC3!}FpB?7wjIP}`_(=k)u2(?gOJ$6InrF@{&5gn?O
z)?!&`6;SV}4hu7XCixcc!xKV&Jd9*^kw5~B=4^uTc#*K-*PVyuf5rO0jhiV^_u+03
zZV(R~i0JQ^7$9Mu2tsf$Ae5(VUsY3teW6_m!kIqp`qss{JGZ_c<0F&~qV9j|zK4wD
z`^*N3pv#ZmshrA|NtQ>a*=@KkrpF6GUKUHicFWkGm<vU25f`OWyNj%zB%O!IhW|{N
zyf#4hz8`K$mBsB7RH|L^YM-;=PO^z#aCUsVIkF(n?l-#U>yitkMjW3<i$(k=6N3>k
zHguKr*Sp9GOUOg1sX-pi-3Ed!vtsAu-o({T7CTGnBzbv^&pI2`F9K{RB6^5SZv?r9
z;OKmn!knLOw2K%DhkdZa&m<96EscAuUt<uyQ!BU3k`r|qmNS<v#)lGBBUbg&f|By^
zM^}?6w{1LH5gi?F28bmJE6JIJMr*eWy7_Jy;Wm~+Vy~}!6k%Bf5PZ4RB{KYG6gHU-
zS<Fnf>%-4E=Q7QCTKR$b$oxe4=AANnVV(0G$3`{k-Q+5;H+KgVb$5kj&aF@f!9Brc
z+&g|Xs;P;`%{2(+)X!V7vHTQJlBJ!H*m5GJ{lf%hHJ~8`ThdI<tzaEDKg2&W*Koxi
z!w$bdT)v1`a%Htyf}!*)Wyhv*RV1J*<b9(g6xhwC)K45uCbO=Z(~rX9ZIDWA&6mS%
zHyrY}atZo?_LTKa+D|%<%K(XY7N(oOK8SomXnJh9pCYon!GFsX{lufVEclbgV7tgB
z-@_6Vqt!II^$?<Fd1Al{UENfkCuQ`w^bXI6)f;QV3&T;#9_w-+(R50?p*z_ol@OUc
zNo<!yD=NZk5Kvxnq{5Q(QI~@895gN*yBrO$De)wK{x!5jr6x6f;!-FD%WGwf-MgeN
z7b<#r$W!>`Qznat&}H5*ssI#1|9;RkP#oI*7&AcO-k*H<=Zt{wuRN<62u0A!NH)~P
zwOM?reREsj$ux~D9qnTVkKH6ZK7?QKBGR(`8d^Vp9{a8%FCu<CWh7v0x+sx;8iFKY
zq!qC#o&3fDHvHsSYF2|?+d_pUGTswB_E6(63LhDrzhQNId=K#=J+ev8nB0i-Kf4+W
zx!w!+b45?0=b*?^)mZdy+J+E_bMDLL(lUD7m(W|)oxZz1eY&O~D$cZVU?u(+DI>mt
z<DGU*X1HW)@fyA%iQgq@JAoJZFEjJ+m+o!2lKGAj<||lQolmHsC++t+PFl_~SlZ}>
zbf^7EBEKCXGigg)Jqq~@KdR3?(m0%%tJXxGJGl~8gNDe&yu=I73H6R|>P!+@mO$;q
zvWZ2fRL*@eS)L^Hmb+Z>8mNt1;s#1SGPv6CBVlV6gQe=aMftq5^oy-ckSQjb^bbiH
z3(^UKT<W0b)t0T1mj>_<rYJ4ycGFyO$#6<B%W@VT=S!iSxT?c0IgL8KpHgU{GQ8jT
zCR{G4G{J8D=i;8ipBaMV(AKq419&`|^p#r)hBuF4FWZ?L-Xvy_#_d~Q!FXV|rR$sR
ztNErNdQKgEZ^ok?yM0Csir7TFE&dqlUc54Ir0nh2;fF#k6QyZQ!XG%%;CU7=T^CC_
z)FfSK73s0z&Z~E(7ppN(q!$dqIbm~`KxHRVhn?#2<6Cy$Z`!r9H_XMLFtspGp%HoR
zt9tlO>|LFUt2g4;oRcA3p^-sf=Z2=?N2X48np|?Fe83$;=61k~{B$?@ovh=r*gF_>
znRgCJ$S`~oo!v!dc+>Bam=PHgROLT~=G`=@$(>vNPWF+q)Rr121ZN{7Ug+{nReLFC
z=lS5{oaatX@Ab8ikC{ucjz6FXs$LS9<lPfz4b*C`5H<5?Z7Q9KY(bDPMthQz2+!89
zA4Zz5fT1TA)4n?Ty?%J|>#51h{Hs<15vO^-Al1&)4O8riX7!iq^l2iFXnGo^tJP<e
zIFk|=>`}V%ZJdW@v&Pzu+Fc1EPBuc@9k%lr*oFq&o@L`(m4=R?6*^92Wn1Av+RF@I
zbM8E1Lm+D3My1F2qF(+i@u85AS1wF|Ep^+3917}4MeKBZoQ%5Sa1ylPLHO|}&5Jjg
z%lAo2V4$Zeq3wA<BSB>&t38(HZ5aXSkoKq2TD-)x1F93gWyyGvA=~)%P;K!%dgTb1
zXYLgvSpad|WO~!9Him?>3)epcG&>QG)Tz6w?+>z(_bE2iyF4|q65A=YA~f=VkXQ^@
zY+S#AjA)m|0ffI#A{}i|Ib!$n3pLD<RPED%jeNyP&rPfB#v2DAdrJt;vU<WI;SJ0r
z?ixL8t3`zSRq~Qd_xyQ)W!2b0!G`O6n4*TQ?&R(_rX98T$>oEz{#E?Vx=?8o=jsTw
z96Q{A$q)!m$Auit!#N|4!yqPq-D}J39oY_vhPlDN=u8nAW(!Sx5qVK%<>%s`Fqm8V
z@L1m>#ZI!l?W@}iS~iQtbrdM*ZAQ1XR8pEsBQMYP#IY`OL6Fc$7s1@K=)<E|;of<b
z`h~&II%k^DLg~>YBN=s_!Tv+9Bun4DoNy##NfGxJLYA%NtZ$*s@ROlPgmm`;7Zit`
zZBTHM4!fkQV*JHlar*B5r)BYa#w5e8<tq_*&<WkSlboEySFcoO3DUFYOX;5(TH5jL
zttZzRMpk2vDs<M$-2{>LSi(AF`w-Spc(2AbeTgx1L_!n93%9wIWtFd9bd!us*-F33
zH|rt|i8UjtZIvp}nfOSK+!4|4utGIt^5a0*gXyVoY5ZJyb9h*ObTAb<1)=+1QD^}h
z35{%T@MJ?sveoJ}hsW!m=wdYZM6p%5TY59(Nm_^BzoSufg?~X*D?I9_^Lr$NmE>?u
zu<ed_{j^hi>l<|$(Jh6nMzTm&o_V3oR6Zr{$t1e`Sipz?(yO^N9#lTb8y-3ya;UpN
ze*R)z^pf;DsaxJK-pzy*TrJc@YORO^%4G>#4&h&V9XIi!X+?x)*Sn+TS!F$=<(=Li
z$Bc<G@l^Af_wb%h$_gduZ(cPedMDu-@jfFK`1DNuGPzUHMccaEJMjV^WmvQv|3sp!
zVYetp{POqVLiRg}gKs}*hXU0O$td#ZODy6`mh}sDk)!8%PTLd|s+H?b)_8TF%KNBW
zFk?ey$w&_{(0pRc=sOFtJKF5O>T}BYB?QIb37PvbD=G0M%eShx)wms=JVcb0_1*W0
z?P6l+<4|ar^SUmT9AK8HVH{Di@A_J0wmD^5*Vaka3}w$n$vL&v9(X%_n==iUs)buG
zb7;12?`qn~+HkRY)N@pRCCo(-;NhX<J5c4x+0ghn3q>}?scO_EW1%x~zM>{jB3sgf
zRT~t)lh%v3`>nfRTh|wEz^0<6-j?+xgk>2i0V9)bDEUFhEvzei@IiDzJf6Q?zzIW?
zB%#W-5Pu%!Fh~bef+J&8`j9+SPRi{rFF;b>9|cJJKFe^`=yRl3*@1<7CAqRkztB?&
zNS~y0deaV@ueSeA(w*ZyUHt*eJyV~xh1mYcoNoN$&AHed;=CaY@z}JWn<v;8e>*>-
zJ9SZ97H&TL9?6R*rKP&D^eXe{vNe#<+pM1nZLa_${jp}a&+GMqD%|^QTYvt%C~eED
zP{dygu9<Ci8nupEAEjzHM|3W@=ioj<Eq=o_cF-&yV3WYYE|)yrH0kR%e6-kxmKL@w
z2>rKIWYDDST=JbMNwgl32WAXet(d}bOB_XcS(3{AvSTHpsGQRG#j@gQ>b_bmIp=xj
z^x`7+<1tSfxV7t<*!6XwKc0@18g&nASd)~@+EfeK&DBF?=%#>^a-CheIe#IhsbnM`
zF7*n&PBmT|I?iC7gzv-*t#3U8TPpg(?A0D;yv}~PvH4voo9c#V-NN&Ab|4`#@zN55
z$7+XTw%c(_nKutEPDz#O<Oy#R4X<*2wRu4Kn-YT?#2P*Y@rf>I$*W4fQwgmqLJY-7
zr?=70=g}dbO*7e%w8TA&%a)>is{BU@!yr?y(HKd9F+&hzt2SlB-J3<R9}BBU5FzI`
znVu!X`*J0SGPe5}^28#u4btV9!h-fVQ*;$^ynXX%+18U1p<f0NXholics3vN3>8IK
zw#5#j7LJp}FY18gbpMfpz2!?N-j`uB^vBMhqA=lYgVlF^X=nr9H`2#*0$r#~QY)<C
zD$q`5zN=|Yvf=E;7D#B?KEBJb`ut`<G-Lezy9*yKey@sUj5Ozl8KTus4h`umM7*Vb
zPzlUSi+GIe_$bN-D;5ql@$G=h1d(kCndx>f{J!2RK{M6e`$;(6lPj+3pK!p~h23o9
zx<C`-O1AvQQz8Ko4Mxm4s!~Bp^VqaNE~C2sB60P1O%^k-<N9Lu8LWM-6wjHX6O3UU
z-Psf9PVgTGCHI$4O|p&$-@aVOX9{+ie%Cy~|3?He;*q;qqA*(g3Zf?PvG|M$z*-AE
z%q!UnAb@jV_h>L>`u76)lG5)BF^St<7g;((U@;_dTDj*fuC$Zoihj%CG;LIFs5Vnu
z%&b$;UaCqq7McD`?f5zAY~Wq%@r^^%bzKqf$_Ejx{C8}^>uq6fN_8r3={76hT(2=w
z2hOf>>LG7ac-2hZu5$Q`UUy^En(X)~Ol&QRI$w2YNE-8nQ8m4^?wmZiVcJ>mg2og%
zu_Y)rPo~72E}3%~ut3x|H{DsdKkr;rvHjNJV0kg^k{q5g2ov8~d)0;`|MJsD%9;+p
zmy2;p>(I2$_KYJX^RU_Ken*?OvwhKBTAf4T&dh7cs^qcz2B@jp;?z!{S@U9oh!ykl
zVk>s$AMx&ve%A+L->KxBr|eMz6Hte6zpq{#vi7Qiv@jDzhh*-`?Y5c0&WyJke~VK>
zW0|n)b7N*)67i#du<}n}eMZHtC_K~qY#luVVn|6b$waTjl%z-ebIaK_{`SW#)M+4?
z&g2)r*le*FR%kLmqOD#fH0oM@G4B>DfLdS04!J;Xu?;ZWo_BwFlTq7JY&OHW?Xn<g
z{zcD*^EX|aM=a~#_e-Z+Z&-IZY#?els#3y>KUPLMgmF?mI*|U6+z(ElA}QgL8#4Ak
zrJm&C$L0EmUabx-^7P7{pJ!z_O<otUaV_+mRNJc0Dg$4s${_uFDgw`|wDeG%^bL(9
z#9xH(X}>^eC}Xd_uQ3|&3QX|()luVDu_jAyEk-9u^I%^@HCtM+Ygpa-jzUnMj>vD>
zlbd#4iupdB<>XkvAzOEC&(QmL?1E#hzMG;i>VRufqf~6}r7}u@#C|2RTRvEYHjLv+
zkQjIWHa?N5daDjK@QYG0wAT8(3_GrGyO0ry%}=zIM{)6BqAx)~o$c)}Wo72b*gw5P
zpK(twI`0=N{K-rFi?HKG9FtYk4;6EED=l^Kbiu7+L07tnYbApRU8Nm^^I4HMKT|?7
z)~c(Zf0L@&t#gW>mlZ?*#C2bE*THXAwh7ObC1Y!DVVfIvxAH{wctJs!|0lKt-8vb2
z{XqF2v|`m$Z^?_0CVu);#$ZI7n78m(vQ7=8d^RK8X!%lEaEQ!7B<)Tl8QC0idjDA^
zIU)Qa=Wa&`!QpzvBhF+cov63bqw;uM=4M)JN|AKWtWb$6cCtT6xwn}NA2@6sC`I(~
zPDoC5sZODl_i}U=?zVocK=1x9JCNQ(4~k(Gc$Rn+I!L|5<tc|_9Zf(usAKK+iJf2J
zeH91dk{O+Tefc&K^+^gH+2O`g9Bb3x?jLTjkts+z0lhx7c9W2a4M$&QOP9$t7&2m6
z_TGEzdo!<eU|{w1oCbQ4TiK)Q7g6b5Al4B^otwQr|86wE1LB?(J%02<IBrr}*R4DC
z1vPz5YNDih={VAg-KF4Q);NkAYSFLk>jTk?a?>$$*pme|CD>)V`>3so-k;KeTtd&X
zVrQc5t9ic*zhu=LK|L)-#vyjH0FRtRsZ>_<adl;?FQvvcv3`~J;_>?V`MUYb`lYFH
zS|)6_&J_f56QtFiRuny>LU&EjGLK&9%oy4aJc9mIrAwKyyi%XL(WzP^U8Rt(>i&(D
zyIsTK`?}ru*1oant6Ik+BKhg6IV}otx-xrSOb0IGn=Aa!VSleRm+`h8ZB(+U(muGg
z#{9?*bf@Cl2|%X<I+)<m91Xr8f`Df)YQF#IYWL{A08(PuhXj#+839z-)BPBHt<B1g
zW2?TOFS5qh&2*im7Fw(G4TvT=eowJ-+_duO+uWOLlWKVcyPVRa;>>)ZSBjRVxS!l;
zGAwg}vU=_t8#_S9twO?GG!-?5SC&gg5L1ZqIW_8WRFc!nHqaAASdDWI%sQl?bqZ0D
zuam9vvgV32Fe8rHGu)qN#T-8Ei!RYk4=IQU@S0M8GaGWZevuCuNqEdO?D_Ng+qK^O
zVa-Q(H{Ep;rG~~~7Rbuk#wnNNdF)wqM<;az&s|y5Jw}q{jde*>V67093=T=dl`3gu
ze&x3&6;^|W&gBo~_@@`V)GZe15eF~I6nqLCO}ER|UVh=onY4#GWO1n+YTOI8r!N%d
zzX}bx8bz9E^N{N<eG_@g&!@$L%A7}a@apa65>>=tvpE79uqTkD8Wejzei+U^lD1nq
zxjd|E!49&bw}7bC#t#Si#N50JU6bN>4@*R=bWBNV^x;p=S0TV_kh>~%Wk?5Mh(WQl
zF8VW50|I&})H1i#w#AQf@&a*k!m7HQr6TRq8hOnrDlY=D4PliuT%FCOxaMgMu-7Nc
zxAh~Ln^?LRcA<$`WoNRm!>k$p+kOr?ze`<tYh9n#>rYx6Ji3oP^a}Fz2UU*a!WruI
z!*C2C?{!_Nu~KNY5ke%y^YblAt<dUxG|*Yaocsdq)mfq4ZLX~Sy7eY(SIe`-YEbv8
zM1{KNVtc1g=Ni$FyFF1~uUdfhD{<_&FOO!X4cJJLe7JiTbyAA{xhpnN9LlFH{^oGA
z^Tn37?tStUPhC)DS)ZKqjv@ug#tTNj2z%oLtrP+pk&E)?lEQF<{M#dln*Wc|&7G@D
z8Q4N42rg98LHW$E%L_Z{$7hCsx%K};dY>u59*yc?H)?g5G|9@Qz;rnGWGcBdpFHOM
z9zis$5&T#S6#?xDnl37&Q-|rpXS>hr_aAXFEk*CmaPMl2#jMa6uHP(DNdMB(9uA`q
zLsRd9TqrZWs?uS?Srn88-Y&FH%Oit;nbpo^Or+Rui<CWCF~EeL<-kB!HoB}YV7}p(
z!y|0NP))vesT=6e(Zw<WpE~^z8D(m$B*oIl&+?T^qtHZ~tNfpC4u-fP6RyVaS70#M
zDHT~9UFN+j$Whm|=9B*|d>oMy_qSl={Pkv^3AQmN36U5r8wuHy)l9;nfOTbRr3U?!
z)8H|BZIX?FN$-Y5*@u{}3$9+JpCJ;e6B<=JlT_)?3MyIH$VsrF-L~`;|LEf=ppQxZ
z(Z_}VxB6IeFG7n&vYQxrb&>L-ifO&EhjJO!eMJ=0BrQSzSn5z}`c}PKR5^->wl-92
zgMvxw8?g*>Fp7Hm81K;`O4&5qLBPj|!H?#g?sC0K{OJ4nFzRHf4vx29KbBb@?lu|l
z$>igWZ?Tiw$ZwPoE}E6D^@udRd>Q_XcO%`Zf}-*auX$3vKO5L!i5S<Yt?y~{Qbw|2
z#6<Jvkr2{A<I3J1M=M~I*2+{@7BdpA3-7TEkg3M!ZtIQKM;xcx%!zmotW;usy7Ird
zCSJ40ep+7f(dm56e8hJX?!UgH{U?NZoIL$UUOf2lr?AlxE#K>UZ8S}ZUq|WBMMuL7
zq?z#@6wEvAS`&riMSiW^m>q=Iwoxd!bGOSnPnMu@c4#M-<dl_PtM<{BFQEt!E9JEi
zam(z|Xm}8=kz6W<$U>^q**x4V5C)F8NIdU{-+rsOVfygF`CR3Zm9N|a+Ux>3ZRr1E
z2IF%p$b8~q&M8`xFD-+xqM>`ttMz|6c)lkjrvX8bMtVy$w9r$(pdPe~{`6~kIt5Y^
zub2Uv^OIpgBpo~YovG2mhW7SSi8r`xeX_stpu@}a+c#=~bRhw(2)I?4Gn;Qb_yj^<
z6TkO|G>kWb2;aAw$D`5KDvD<d7NP||=)dkMz=+QJ>pE;j5Teoo`KS>c1AXYh*>OsN
z>-f?O$!S0KAj$H7YP-E3X^0_N`KHgcSY%O0GN}0x1RfNyQ%ntH;F>b@L&MLWFrX>l
zcC!4>InS8I3iI@l&&xA-@V@(7F{jC3apF3uF&D-Z3g;J^ov*41oL{7VAl8YR`^OoL
z5gRy5i73wJ`cB-vyR@sSd-(|>=^}2^`*!%iSu%p`HYdV;qC}bb970_9fftfP1|_8R
zgF8IW;SLYQ*E#c=HnO_FSibrQ1(+4A&#<LX{`<y~T|(`S3I^z21;gG~O$<IRt`8VD
zA_qP$E_~bw=jj<RZUya1>|P#l(nQ})AF{&7Rmt0}C6EQ<5`@gt5TU}yeVnuC@V|2F
zTAI23CKYq$j-yT=X8d`c1b8Dxw|m?Dihzi<*G}rJG{AA;pfP*z;ehw0iBDo-?|*n~
z)oo<jl6@UiGdkdwGi>2@s&-P4`J)jyfhe%U`I*?zW^Xfz3@XQk;3hQH-?>Vr@PAG#
z=l_kO%Tc-mt(I03V8=hNHqs-hm}9+AH>Wp$YQ&=cu99Pdq%zgLgfsE{jHB;MT>aXa
z<S|6Lh<CigfgEbm@YTt@u)?pys?9{mqvp2hRfx`0eHE7j9pm@a#?sTS^PJVU?MC5j
z_S%iF$jRN6#IN*Hgl`1OSnKp$-gs~sH!8c__@s-SuMSXMPRh>!K`7|%uNp7`-s|u}
z&EWLs-!|OFiUKXy*1&*Vwf{fKUpCXI`gbnCzYbm?ijsZtJhT_ioSA1!NjB99E-yJ$
z>fMTpW<sxczrFMju8?Pfn~uESocZkynkwFrOX}}Seu?{9t}iW-)u<3)*dTpht*h&8
zz2P9N*q^)EGL!oLY;ce=a9w%FXx+L6X1(6ao!Z|csXvJ7Ptjx>P}nnqbaxXk?$Eax
zSnUz`p!(p-^smc__&xX{>@j3{-_;G9ZG1H3YDjCEQ1L@^YA;v!BcA;e{((OJ4O}9H
zAWho&&g{oOeewu+yU(HPktuIGDie##rpadt+hfz(P4?3pOeDl=YbK!v^QNMwt4zm}
zqa5o+vO6zg9IUNBJ#$|;aGk#KZ7AjZW1h~Ed*;gNIy{gxcsB25t<vw+c8;riXj0mP
z2N?lT&V>#(k^tiS8{ls356L#oYi9<!d#p_~E-Anu2eSr)yr@BNsPBzoDng@YNLY_$
z>3jf@Q8XMayhx9K>#QNd-Q2Z`hN*|MD=(w|%)#WeCe^9uwg+0K8&!*}dp|dlgyKag
z1*1-VS6weo*ec8W`S%<LeozE^WQLoCea=m-O4VL8-cri$q~%OJqDYi~rp0nQGp<CP
zSyfB@I5V)$Ka{UJ5bVswG{cw)_`Vx=RjrT@mtv*^^35EpmC^7^fcOfL&j39;Uu9RS
z&##PL!}=`Q+M=ADSF^jY7+O7I#q<gWjlc=xso2!DM}%t2-@U`pv&ON$Y_GTe{$6+O
zaAtGRF@it-S4Wkb3M~dkPMA04I$<TH)@JMlX@7Yj8B}|Arb{XNdG_-PLv&fNF+-*8
ziJ8y<F$72u-J}V42w|ezHCqtrl>*rIY58@O&e{nN<g=A7-rl-VWN6g9n$lgTE{299
zIju@KE_(fUnQx$j$(PW)WgXP^=jsaY`!6QxtsFx#=yMw#J<>~5a$LAalU(B*o;V7)
zyx;hl#+KXSi0y5roO9D*e9x`5X*(QnmT-NHHEo;y?KS^g(|h5Pwf-{@^r#9%sO@+V
zc(8dOvo*r~2tbta>&}ZAKzs$VT56Dq6?CcyNIa!2{OSzE?4CR$#<QA!(=IZ_O<;t_
zyETJG@@PV3vohq>X0kG7w&k%b>a<6W(C@OVXOK8I!Tp?$=SjppL#de4{q!nGf1m@|
z4(liWUVX7so-2OP%DL)dnmHvDd($*l?Hrh%dh>JWmX7#uJ#Ln43cYxN(P6dUqpHgM
zz_Nqu<Ro^mvtJS)pD!r=b8mw!OBVnaFIw61PP{&O@MHfEO?8c2XYCsPc&pAQ##2M<
zlver0{M;3I0rju`*a)A5gq8KOaY(YhB{b4_v|hQyDan0vu@pR$2x~3fWU}kyE2aR)
ziA`xm{<A0GIEno}=d27@%@Aov;!Qljm9;SoV}0x(q$fa}5ITqI%bc|FZz6rQHZMI%
zdd#B!kmG}8-tA8K&jtowz)A0rQC0wa*#lm`ddiE$`xt!0YneR$^<x7x(%&NPPkVHQ
zE_XI*;Gm#bL!tqEV{_){8DG-_8m>PoS2gx2Wt%)889WdM0lzCs>KBX$$qg>uZmvUw
z)31N)Fk-2cvv$Hqu#l>3{t6emuat>duJH*X!Vma7j|e`1cIc0Om4SA6@OE#;2&b$;
zI}GJ=mS6C8c5;>N@8Ip;QQ|v(N)S0G=RsP5ul?n-uM}88CUW3M6RI<$vpE4`bcFFT
zWg&PBj!LVXXBZx797gHMU{HX~MtsMMgzK=CpJNVSEI2pZ-cAxu+Jdprl**Zx;bXl~
zs_c3WAL}_Ko@3m<#`=Gykj-P4&jPLI6PVZ0Ydd~2gr-RFW0MdJ9aiVe{?A7HF9oS*
znE~fx26iO>&*XpR^?y6ohpU-r`goUrv~Dwh|NLt-D<Uv!tI0Qpud#!^N&}XBC669*
zj09$cpiR^GD==AWNM$b2&K7872;oxaaIJG5TXfcg$Eplgl}-6;-L4_%VeW&CrYDfs
zg3hm1;KP4?FB}YJAj~^9{Zf|hzt++nDgyen|H@Zh4O&fm89HJ=mi>;$1J|umJxbI!
zMDSra0aFPv9o~NeqDp;32jPHk-<>{#7Vy=7eN0CH{x)md4Vu1!(`}$p=Nmg&Gb&}o
z07x{#{lq`=uU_HbcLB2F{5j$Jn*_eWkojAFf{w8u{P2#?4h>fS(=mKREI1Lt@PNFu
z)W$1*l7N($!W?pc0WB83-hLx{I4k}&!v{JG73!Pk|1UHX-OXJBG&u;sK-2=Dng!fy
z--!X57|}fk00LU5cb@;MJY8wb0)UodTdLlbT1=Jc*4q7gVh`Z2M4q#h{06gA!wvMy
z7Sv(^@uU4UxTk&mr9R_7e54abV+@{57x5_D1^9JvgvxYr--cuWMwC|rK#ZHdx-Ko=
z-2xg0xLS5=brtDU8-BZpUbFyM4-F9ySaMce_#~*n+%eD<hG`9;e%MaECyHMtnR@=2
z!bksF&v@;&Q`tChHe=2`n9BW$G6s}!H;WU2v8MAoeg?>#jK$Y9euvV+tFe4*e?xFT
z1C&wsauI+)&j?_jQ*b3NrOp9dYUEi5z|<Ts*XIUxxIgNYYDmij;<O!wk($K=OnLqL
z>m6?UKh5>vCFCr=@M$6V$c3|E&L%VM`n4bv3fP4NA+Ob@fx=hA@l|*?4{|`rm>nVi
z5>xovAmJ$?LESgfZ3hr>#r`_@1CrPBLU7o<x@>d(nW;tIy^%Br5RY!4JehOY11P4q
zUJ%3O*1@5ywF$?o1PH5xw^t{acVYljQuh7ATg4^-zzKm|C%QmzUJsIK=lJdm3j;iP
zOr8+0ZLnYokP)_$tS20L6>rkC!BlX%0IUT)shL;4PPyJofC{}FNp*h*>es&jytE$6
zldh_I_PTf0Q#cH+Kx7X2?tXy{agAo<dIJuv#hUsY@_!52T(ndxWCTD^19U|8aX4Zs
zKx|)wB&j6=>`6{eXx(1J?}1oyzJ_48>bCPl?v7jDr^?v?3{mHN(bx5AzvaMc6JXRu
zPb%A(kEc&ce>|H|msB3k;Qx~*FAH;;{UYr!Q}q;A{4}1*edrm%wfiAr=Mq2*+|-f8
z3%#W*8Z)%bW|9rZN&q-|hPU7H3LF5EK|=ulM7ufIJ$HCTu81Eem6vMz&pwkwlXh|+
z?R5Nm9pb&wq+llQ!~AOgH4|#0#4U)df^p*K291w9wu>_tm;*skvbe2$FOR{uKdD(p
zC2lJGPKE`!vZ7XGt8bU03n$4X16mi}bYoWqppi^c0*Kq1ll*j1&#E2VyC=^hd2L6i
z6j{d<`bywHjraFAu8K5c5de1cq5Pm!8|`V~aQ`kQy;iBE24u{v)mI^o$`CIAnG4in
zD#bK(nEjHk-7Dqh)NyxSWy)te@~#K<?86;>1l2znvzQSl%ldfEtj5Kv=}$oSEqUQV
z8ZpIQMI%oe7oGr^PiUT-Oq1LZWaQ?cgBz9VjnpjcdkcW7sr5Q~1=TJ8+%0+Oe#lI_
ze?I<#TlrwI?N9C@(i1C8Cs4|sdJe#3VE%Ygu?xybeJaL$|B?W%Y|!&IR8qi^JfLK2
zCJCgV^;sPW<-et%Dee)yW;_Ke-7RyoP#Rv&>6E^?Fn9J(+CURE@iQHAeurcRjbJ&n
zP@J`4@Lzj<Bp8%#kz_feXTK6f;85!A{x{9nabm|xip{qNfe-F-KS7l^jJ^VkQalWL
zCA>a58E?Cv0H<|t{?TI{fIJcCK{@{Vllb`?q!+c<E6SviY&vRR(|H)?BTI04v;l%+
zR_dsINj5V8Axp`ZjTpcY-K8`4I|_45Wl`d4S|gb8<k_07DU^!6yl%oV&jJYN{Xh!0
zBMS>;M#boICWGDuZ~1`g_DI!?hf(2^^vTf@FSCSx>5^Z&<6Qy`@drN?poY4`XfOuo
z&=h-b0GR9B4$+lJhJ{U>QECn-T<cH%%^_Cj*J9xaf5std2~pF-Dfbxox8CpbLApNO
zW&ohAZZ*o(0+X!Feg;Fq$e39o5ZlolHC>q_m23d%IEPOHaCF~Fqme+QKtX7#xev{Z
zMp~$!D2LKY)N{F5KeDnap&830B&DWq4y7_Ah~Oc}djk*{G%M1(7kcJ)ETh3gtlGt~
z@s-i4#-|{<wvN$5xkhN9Nhhx{pJx!1Cf!De<r`wx`JJrJS{){Kd)PhlGaO;t9l*SZ
zKH<ueVLL?x2~}Rq3O@j_MTy^WKg-Aq)O8KUBAl5CiFpJz;%j$Jb1pQ|J}M=8b;h@&
z)~Tk<0OufP@KJNA?e8T_VAN3Gg!aUO!}gi1tspZw(2j{%Uvypnb51EsKMKQHei2Xo
zgDrGb%1dg%%QXq;vk~YEw4hgfVD!>V+rRlCyeR8f^wMd|3NK)IW}(r@?$$D~enHO*
zXyY-{wtHW|{uavS_Qp}~=rAP9U0C-gGO2xZ8xh|8_J+rtPLdAdcU}&_fkoU<7Br~x
zt@5SfHRyEd+yKPuI$HhypBB+G9Qqev-HGU9h>(l0SMj(g4Zj`XCW=^%9nXECi_jP&
zQh?<#17&3T_zI{Y$S7sLQM}<_nGQ5sOdADRVibII=2B?IfAYWO7(8liSnH~Qp=R|b
z-YDYVr+847)CX({OKnJ2T!R#=)BxUockyr1*d+9cCIAj=N&%z8mr-#_76T_&JA)fq
z+mjglc7Cu;N?*`9YgFYK93dZD14L?G?_;YH36PPikrLmYby#kfEV6jPw6&MG)t)y(
z|5R{M3W$%lv^m|te#b+AB220mz_s?Cr5|;EA^$Nzs4k`nlKf7CO4E5PgKs1u8Tv26
zUfAzXMEz6&)9fP4JmineCR7umF912K;a|#uTIxIFKHYqiD;OKU&Ede5ti>98sDeRq
zuujBSUU0WN?-2zT`kudON%i2+Nb1hP&jvOLLwG;M|6!BRqkRbi5H}O+huyfJC@8y1
z51r|mu8>JhG~UqWa9=ifji%D!ce&5z5mLyVGxpKldM*N`TdGWrhTN5mBYvvkeRnp4
znG@p{(Ez-Rx2p=IPl&LDeby2b7wQU`JOVD4Q5Ey>R%Go+nSS{nrRUJKeE&|WL?!c^
z&cm-%mq!Ofm$<hd4@AhX%TI1Cu?`oPJz3N2_#ndo?{^C;jDB8YPGg11qqO^GAul61
zEruvSzLYAI9*KtObaM=sN`(`x2^6sv(YBg2TgH%^P0=@F+nK5M%=gk$p#hSuLUU5#
zU63GR$d_z5gAWA2MX6HuzYSD1x<~0DkGWuRc9`s0H~THz72KFe_}k^0nqQO;0NM>6
z4Kj2Uohm0}`B{L(4|<Pp0ONeb-BP%eA4g=1&NeOPEw9|@)~`b9njiu+SBglQfZkN(
zV~}*1hFyptEiL1v!TRixH`0;ZR*L#B8Wk900e7fbzJm`8T4LgbkAzL$+^Sj2!VN$~
zkP7cEA8Tr?#am;ogpBb;5=K&Ap96Fkp<v82HBAVPvqe@gbSoq`WbBaRLGW<ZF!>E6
zjwOKZGo3pyA1(}iZ}u9I6EP<omAWnRb7KXOap@(@OMNOJaN|cS#8*#jN-4X7thZt}
z6fi@$Vw&y0P2(MWRxar3ln~_0d3c|6yq^N2j5E;AJgH4lU+8i|Y75k|MdB<B0n)lo
zm5Jf4f`n0_6%psd)o38q+hxpUV=ILMmw_a82{!OFw%|sP-aKCK>(8VYVvgXXbPrVA
z3l?GgibW+f#%AjGnXM~_;7_Xc;AiwvZ-AJ+uuAWKnchC89ve^JuA!i*ft#-)Q;FD*
zs2SXWC5M}KjUbJ9wA@`CtU{af!@!~|)^*IKWp88rqv8PLy7)Vpk16CY(4G-VlQAdJ
zNEj&j;v6YbvgJ!g+`L-um$RvT6ZY(!{438~I47g(u!;lx(B%JS`$L=Fqk;@!JNi3+
z#6F|`#Vng?rtKat+^6_2Z{e>wTHctnH{#<YSg<74vUUPSbOrz$X_A!Xy#>%`BdI95
zYIbl#^hR`1GSn%IWRmT&0gsEM%F8_4JmBibl;#3~v$+Pr@=K&B#Qv2VQa_p=ZBPj@
zyAcPOH}XDzD1Qt$S<Y)TFB+WudXvLNF3Hb2@xGM0s&$4f_E4k{ODOAUg#ZN&?fjGD
z@E^VWZ@K-hhbFaw7c6_JRffsj>om_mtizGUn5y%b@-|2#ov0CFZ2qJ^S0eE(rW0TO
zn4ZiiPfbIj2;8lvK0mkm_bmTwvhaT}@IDN6mJ};~q6t?)LtwRgzvS;p8ZNI_>BLzY
za7Z^0dJJ@*3OMep0JP_Lt@WU)o!fN7FCbCWMmuT$HsAhn`ry{I72Mk*k}!hUS8L?U
z%;Ht4QfxZeapZe@i7fxHVycJRu<)?$zvuHW@93Z3gpf40;t41jj5z-JRSwPtcq;RM
z`%wRB6K{n?=iXO32>{(0CHu<+k+6cC;0n))TzMzrf70|EAzm1chfd{p*h&5e*Rd6R
zl1v2%O{SVYXZ&B7^5n^*bUv#-JmIap5G<yDJ}7|4$4f10WBwy6|II}uF3=Yvsd@Py
zygmiM>;7KNdWrwG%KuHaR4@Gp8C!s)H4d;0V;x)gK_VB@|6J>ySSjd<S@e}z%P%}7
zeuT^eSQ3nR_5^s{@{f2oqdBVL0UCU<Gi9YE=s4aC&QP;e%gTR(j&O>i<sNC3^qct-
zduN+kpg|}R>N+lE!1_08)|^8K0Vf{r4g$gH%T;9<cz;#$+;vebJHyTyv=0K22@t%a
zj8}CSaa<Zr9eprKRDRjeuz`oLmobx=BP!UJPh+g_jt=w+@XYJ}@yutuj9ga~Ep}&X
zc;Eb$A{TH<%L3i1<%95Hlbf~h&2C-MyX|pcs^s=O(1B<b4^k~WbF22fUD8sGBg;Lj
z;LG%<#ASLf?z?x<!=;|#8ML^CVg0F&GWqkm<kf<8Dg{D1ToTgx^rpuvLFU;{3H<Qy
z9r&|A8(7`nUIEvb32IPNpuM4*Dew~R5Ceba>*>MLk_F#Cao8tdZDu1FY>?<Y!UNk(
z%J^M~+^OG#dp#^9y`gjt(tWc(AQ|j~qen3^_!9Di$AICL6yO7yRGo1x&ErWCI0X6n
zpoghxWyae2dOyJRw)#o!nG8(9XFE5p)XxFqF5&t`;@0ij1#rgaa-#X<zoG^I9dA)W
zhj)(j-q#L=?orUm$*3gh*RMdk9)Cx~O;ce(>>bIlLxWJjHMTec`K=#_l6f80wr&G^
z<?4~~1w7z^XT=Ws_~+&&>Ek1ZY*0v6mgK3?8Vzv#Ij*)#aue?i%9r2_egtQ*1)la4
zGUzU>RlJcE9)ZE2zws@G$2^KZS?o6~DP=EE`|f!ELi_6cTd)0QUlha`qQ&knSBuBl
zD0o}CKu1KNpWWBsy#Tn+!OH%VW(C(Hyx;Zw2jCfi>#`Quk?=6rE2I&ZL3AXIovb%q
z=p(Fit?%4k^;IT?OUc05LeH~d!vuVljc>Lx)xcK?)vOihg|FyQp6x|3LC`xE54~!$
zrxPXWsTBMUeZbG*wjM}UW-^hN@CBg^t#&_eD2N9n%rLJ64`&Jv&euB@$%J8OmFYmg
zm74%f>?mp9Qn|+P1^Bpq#QG+d?9?|>@HopPJT}Cy^WKsb&_8SY#1u}nsbcSZ3Y;Cv
zM2ULp$_(*nTALf0uAkw4$+MqS3;;%Iz6{fI;cbM8*K!zF$k@scka{*(YkYSHEy;0H
zK^p*=e;AQb;0Wl4d!k7!>Wi@JBsCFbg;+9rfE~*9taQ5CoLjp@%_IF2g@6;;Q4^kq
z$q9fnR|2)nF|Xa0{4e@}htf3VAOEaWa*O2>o1B-i_}&j3FXJ_@6|VpTfZhH+3W^gr
zktOU7QBGn$27!kDUy8KU`ldrsdgFvajyQk%YZHn$#|+AA;nDS4JDwNK96+BvUpa5J
zgU=IwrKc(n#8{PD*wM2Ug0qH%k+BqTGaS@p)lUEbLu?*@>5jlnjp1tUySoLRze*~X
zX#>I?Fa=X*%zTIMMv_F}hFJ?v*D7#JQxI{4r1UBb=;52IrNRH^{1{%>_zX7n!s?Oc
z(sPGNr5u2bDh$MjLBbuNb?#Fn%a_`;y4gJ3U8I6gjz#&6#LX_>Hn6O2{=5|Y!w9Bm
zsOqn5`;V^rG0D=x=8ZP%!|6S1G#oAk57h3S{GWp7gr8vbq4GKzF24g2-zo4I5RO24
zjKTRIPxI~h`f>(m8sFV#(l1wE#7_$ERMce+^pYlvLZZ%pB#8iOQ{Jxj_EWmHE)#LL
zJ1u<E9Fe$jO4ZBo;r{kFn9SF-^*4BJr?H*4(gY$TYhNeS+;P$VKce0;EUG_R168^^
zq`Om^p?hd%Xe1=05d?+~>2452x`&n!X-R1W2Ka*_(lSVQ-_1Ga-uoGznR)j9#aeHz
zXe9-`;_zd15=@d<06jV$9;i>Id;ThAC^>({35yI9_=!*2)K2oJ)rASHu0|E*j^xBv
z9>~*?%*k3;kgm#T35{uF22+3&fK>zXd;(Xv{zh}au*#qM*)KSKC&k8h`%4amFpYe<
zhbtujx1ym8IG+%nzQauhN+oq*PGXs5cYu)XaAMUTMH%l=w(PGvJ6l`ol{)#f#Z5oo
zcDCQ90Eil8KN)M>)k+v9JtuG{Qh?brMS;K7<pJfUwV`MvFJ0ij&;0kTmvdo=;sO9p
z8tK0L%@cu1=res<Y|NSZ*Y_Pb-@WHrwZ8kpEITdkVvcfof1u&%G;LcA>D0;*>?>c5
z_K^D5IIx+iuk&u&5-16Y2rlkUr=>Qm40MSgSHo9FH8gL(_RXhI<CGxR69o7Tipky*
zx#}b$OHJ1)mjY@Bnc6V7pZ4PB-)|fxKY<$)MWu#?3hO^+W~wT){9S;&em(0XQp-1_
zz(^0pgyFyoy0fQ%SrK&V$3P7NT8~;+&j)Zag`4xeo!IcVD;@2<!~y8oo=1I{;{eEb
z2!3kPO@U5+BQ_OCKZ@MPVbY7VIlcqHF!JRUd>XM4OKJ=euWc<g%RfUA)6>9z?}C|P
zb20F#zkX5hx#~ZBIDCB2eVSV94}50+&SV@%!CwP#zLf97qVTm;@FReA;yhr$Q9q(F
zMG8y<QyPD*KFtVS3Owu{vL)ezE14_<<@2ArIL27ug&PrdY^&$jH1wF&cOHP$&WL_T
ztz+|A1)m%4E;Uy^K`R-L50|~3z!t$Bt5jnMHVC=iqBQ;LQG7dH69>Q`Y>`Pxw35(}
zhieC4d~2T>ich4aD%Exk&F3bg>u$Q^7EiUt%IB2M9pHe*FE&hNMFaK;>+8B@0HGHX
zbP^_8NVQ=mB7_SEoH?rf_3MAeHbJaLyEa04Sh}H}4-CnbVzDw1Ai|M7!$HSg(@q7{
zlK`gL_H=`X8{-YCeN|*H0d@@}<ni9uX{JP_SVlE1+zbOH%0Zt|K<_5cYfAFKKPnAW
zWm$sV%dC}{2qERr`4x|N9+U60JjxX$y`=J&9F3%!)j$Slnu#QTVYWA~`21|&@nfFM
z00Gax9Z_vKo~E9Oj!OXsFr54V%mL@bJf;r4b%R=ynKX-3qsZ2o5O98xMtlGSja;=U
zissoT(u8Xj-r{jL86!pcpSGS)y`JX-3z5__;3)u`^CLim6Q`L6)m<Q1Uen|tQo>pp
z#n9H7U;$@;*Mi&`@xbsTPYl})U^6bIVE~S)qLHvz|G&jCd7u~%wkAvBFDh3bfIT{@
zNK9&Q7J1<cV6ZyYikuLLX`}RWv^3JQ!?3FNdLKocpo(BRtxu3+d6MgJC?*t>oN#bV
z=JMADnV4O7_+7`=hgW65P+v#}sO4kkMx^1jVk~t$vo2CTQK2dLY({OWi+Y+x@IslU
z*wEP0zg+*eNGCiIw3(_zOCfs~|1vzX#P5X99HXIM_MI5V@I|oJg~b<oO|gC>V36w*
z`*Wcl`-I@wL{Gid5<>YB3;y4$xe;IM|4zuy>7Ym|1s!=Kv}OxFiw=V)-18a$?DP9s
z07F`Tvn`p4^#7{`03g+oSMJ?xPt*Q*t$KdFiDIT5$pEDiUww<LxE|(-CSr${kYb>H
zBxMz6hB0{AXDU2chSJ58OTRyQ#5P0{eHir8`=VTmD0WLVi<F*O)q`RxqFd2)1s`<m
z@mI=+YxPf8NR$yUxz(AqE{`?Gy1tJ5><`7XV!&!vB9uWyS+%<TT!{LNKCd4cM!b!|
z^gWnJnh2Dr=BVMdjJIZ6rjuyZDAGB(VCy8txR6)-bhp*gEpPe)6zP-vBNqSpE9JrH
zSMLrLp($!;pR&@qT{9(;>`}?wi|_M<vIVSUe^=vl7tRG|qPT)d4gbFifQSbkF@}bY
z6Y_F@xs}K9i|+j7Dx*E%sIyiGIz0ONge&9TGU*$nBCP2d&Eo(KCk-@CUvQ7D3-=8+
z)2lv@>VCn~X~Jl)sAwzVF|?)4q>ujmZm}N_YuB7GK|TV?5cJ}3Od?D9DqD8l0$v~*
z>qD!{B1Hn?c#IPPN(GZG$Ep!+;sGOEjZ@7P>y#cH2ikWE_8NzqV;`VF8s1<Ut>PpE
z*RK2PQQ7GQTegHY89v{qE8h3J>_GtyoK!s0NFkjFJj}cfRF%G!d1RrARIWW%BPUNh
z%@4jWX`Rhne-I?0?|-bb-GgWD`}2lQSyH6eDSCLn0Tae_+XVSFP71-C$GWdc*D2T1
zkoFO0R~so(OEL*T6t{Wbm<tKnEP@uOkS9)&woby8(|IB>1I=I0i9b9$n6YocHhu(3
zll^}DaQkxe@7tb0gEMuNkvZ-fbd{L;hJ$zroUdHu2rEMgGXc{zi7N!?Db5SsB^@pT
z!~xwq1ql)+uq;m|hsRgS25JK#1;Fz3UyW`~@7wNkqsptb5Hc<@1h?H=oC>^@$b4h6
zmkmnh^3<o&+Zu_{kfuNF1HVv@XDYO5)QpaQAC(bMwdt`G-!SFX0Qdc40Ik<Fa58iF
zVP3BfG*nTdla^5oW$+VC3_*~IbW#i5y+`AHsF>wRx`k7LjlgxAY#uJ5h^_J;x!sFk
z>jxH>g)}vKF0Y*_*itX!qM&aD|8E+iw<>9%E?kJ=45qsu9YNqHQvG8${+o}ZCMo3i
z20+z3BJ*@8cfZs%NddTy7nw$oaAM%y^cP~x8BkQ)abi=1g2oS};RArQ2nGG4Bn^fY
zqpRSQ`DdG`@xc?I6a@*ZQ{37SDpXAO7xXHze}2ZVU!fJAhGRv?N)lbxH>vnHOLkI#
zhI~Rk;yx4@4F1&0y;>m|Wc~@x|B*y4OYs_uYnVM&n-GoO_9K=M&TBVReP%24KrjhW
zY!u@7)LpQQ<3Ew@O_%!i;iwnE=kvzNv|;Aa2u8L`pbR_+GMDzBy4!EI@-M+SdFD<h
zB!G_lvGOW;;~GVsG)W`Xzp#<D=d_2Y1dUj!IH^qFGPLsTZ<<KK8MSzE)?sl>8;O=D
z;5zwrSQi{p8hzV59THU?*Z*tccCa@i=%Vq5UNd+A&l;oNuI&UJ1t)x%)PO}b(E}~{
z5O=Au=kXyhyPp;s>q}*+s$lLU9)t2*pTjZ!-#`PFwG_Y;n*T=;PfvhxbP{Las-xAP
zE&AM$-B#wqdwWyzX!(UFhI0YmX%xl@Jd5UKqwMj^TBiN)Q)%rr;uO!WLkd-eT(&T%
zt}epUQN@Qj3<1qmvb&*SLE&dJiu&tH!k0$bpWE1MiHTWZV0D`6gFk5LnXLfYrsK$-
zf=$XUF^w9}0Sj6-70OD@k3^SnCW$32+%IRvU;_-mtC(U0d5O)Vfao`mzxl`I5RKJM
zVKYFYy@8E!RZ2vW|EBtVJCIVhiZvva{3R@oQlV4M5Nl8onQ8axct;^i3F$fOe@b^T
zh-HVxRFsCZloyl(wX`g#!4xVWGJFgS6_5qlg)2sps>hA~QkUv4hg@pp0kB(t8v76*
zEwl1ZN<pSm%4h^<uwud?Pw3_SX24A}6x;J}f<O;Zz0%6Y+_c(H`&hnMMzY4V_{q4L
z64RzdHd`5^CYi=cDwK~;@`{fyb)1&QTv=dKOXVGSOQq3IUXBOz&&AJ9;cDD)z%)a9
zzP;)sNzkVX7`GIL&#MH!cel#R=5(^}|NK;o!EQ^urcMWKU}o4k^97;~VHQWl#t-hm
zo?eo)eqOe)g_xzxq^a@(r;LXegn9{KGljP)t#rUv$_w!1Lz402a>ISmsMH*V(a5G1
zNv79xck~v4pKo%6r_(S>g8D9UEb<7Q;y9AvT?ehsO^O3bdSEE?KXp1m#!cQA;af@#
z(y~CA;R^as`5jbKVgD2u2Hcq_WO<CTU{_we4-|mjCqaU&v=QN4+@I)ml8(n1>4ZL-
zJQ0{!!Z!qBd65|h*>hgQOgXI;f)f!;M6x`1g}7v>Il(vm714UAnI=Rw#}q%m4JH(=
z6mp}@<|DZ9C`T^RSkzRzBK;#IJIip2ZDeb472y(WD>o$Y{i?DlsY7YXblT)!Cznfk
zCV4Xg?JgOg>3P~0erDi>O*}zX6I3-VE>87nAj;4dlY+ly#ywU|mKW8=mSaw|4|}28
zqoHyZ>xh@%TdSEvYx*nmNqA0A#m3GH1+oBlW?UA}MKzbqD}!|C&7lBgeFl@6@sLpY
zn+c}#%~$vStE%K%$?hLU;ZnvKs~_A`#ahQl&B&J%3)?ja9L*s<RS+FOyJxBMu?1Y6
zEx?xI4d<cMw%~p8R{=_-y%1U76B{b{qdT-babnRY@9aSF^4}f^k-2R+K89j=@JlW@
zD)BKGwjBI{WTnWCml<%c7o$ps4bqx|qO?JS&(N^S6jBj=R?l@&q{*>NnaRG#Sbo-H
zOX9aSP37Y?lU&*|n5D$R1yhw>aLUJX=W(*MS0?#U$;&FN`<*9pm>kJ|8-IVYnW5X!
zh#KU5q+aB1mtJg@O3@-|(j_g+8FN`iU|*6d(|7hQ_2rd*5f}&GpR-mjl|NF-dw+TU
zO=Vy6^SIti&TzkehKM5(op*g%g3ojoCCM-BX!dtkdwXdp@pO+LB_Hoj>EKHDc5@2b
zDM+kxe3M2<e1#u0hHUiDlu5}ta|?gVpU=wwsZ>4ch)iR54~q$YF0#KYLq++hg)Bo2
z=@TnzVQRr6VGv$-<&ESo#o!w=p4uKfqrJDQ{XW}VxxlT&_<y*k@Xc<!D$@LZiNwFg
zSV`Po_pu8~V9-?lF`dl<$O!-<LMZy%?75jvhpqf)-G7cX*>BkIUagzAJ6=+I_SMh7
zL3Xt!BX<954DS-=(XBU}-9g3QF9`zVl=-Xt+21ooO1f^oJ1@0atH2G5TWU^u?gOpr
zP9F}9zV>$en4n5R)M=1QfY$C(7@24J)8K<`TLZJe!xyu+`ssgR_xJX7gr4wMbMeqR
zbEjQV>z+VMvF!hOtr5;>Cm1NhF|UaOqSi5No~=d$)m>;V&>xOhK}u<NH<TI}b-n^|
zA;q@g9cYS}y^*_fm6TWPWMV!ecc#z0x-a9up3Q3JUvoU5r}BZ%0d6!;gpxc&<q<#;
zdc#9z>2;BR1|qnz!%>G42)o0z`pOWM#Qb9(h)+ov_pFRv-)1sEkwR<b&#%!Q$O=EA
zznRb!=-!OwGmt3YjG*06rZJa+pAHbpk>xZhKIplNz!UuG%XF%A3Ai~6Xw0x`1`?QK
zgKJlnfXy49x03Dr1OWZRlD8KwWS2^?vU!z%wG7-R(iL%4FRAuEE4PdTGP9;9(R7oW
zvSy=Py9N+8*<=S8oQ7gR`PY5SODb8Ssi2r1kW~erB}caz_j50KI?1an{}aE(fD-fg
zvmib>r6E2#m+=P;EEt{KVj9TSzBV0K&YTx8w|JG!c=gt&Cm2R~1;?0qp6D2Ejl_kH
zJA^i5>E4QdaHput2&@!H0V6lL(l!Sgat(QS0D&u;cu2d}@ckE!T<+&XtEzwR#di;b
z6n2|+Bkl~!#~0mv${-a_uL~$$uZlXpDPfZ17R$Q+W3mFG5vtydR49&+(1c(`ap_k`
ztQXmSPkv;EF;b2EM!Al7$F3Y{JWnPj7U{1*^sYht=VpMri27d=CRG4qd10=oN^GV&
zLduq-nxj`ALmw~#_hS5ZcdFN{46wgC^MEpkvMG9Yn6m4|w+T|ND-HROmlUb4?%n4f
zI?14zB-p<oXGLdBqAxd^0Zz5kg%PGl^N!>nZamL2{hft%P9V@YkyJAjM7T#z<B@hz
zo=*c{@xSE`0M8pd7j4GZGCx|H<a$!!k%boyrF@ZmDFcik*!z6;58gUP1m*(}g))lA
z#E^h5X&{aR6T`Rv0pa&AH!yehN~y;IeR0OmtcfshDiNn*{S6W0S+w5#(+b`vr)t)m
zh<EkYT=OrtIuUc^%5QuN#|1!it4_rVVk(azktZc|XN<@qV`d%806FMdvI<O^@81Pa
zIqGYFS%U7c*11Pl`v4kaQFz43&h^2yEJy+B(5p&LJ40rsM`dwwoE~mfSZAr2z1z%r
zip8K``d3R})I=E#P#7FV{v+x~O+zJ?Hz9)80lOiUw=62kkx@D}aO4|_2J*^-2l|)f
zEgGEEjVUNri-przul8zvZdRv2dWp&_Ivv#G7HOqYieMteVKP%VEM$bAjtlgDozNxX
zJ$oLG=Kkr2NPpD%9)Jmd*hrDHbQG>;w9->iNLE0mT1R<*JEKCu%FuyMZvZI064Pc<
zstA}_ae$M)(1U9dBB~%u9)k!Aijb%VV0GNm4Pp7mUE$v{J@OuD#4$dl67iM)VX)Jm
z{rhe3Bn9Q3nf~qYGcTvW_EX}yNx%id><w1g6T%No2vwDn`3a_L=xW~vtiUHRcOqp;
z+}UZ<H{u^Uv%vl;ag<;>h6^$cWF8z%V-M17w{<h=iSRMB1N6pp&%G-j7)rP1R;Mq&
zIvF>FpY|yfNBob_SQ3^9RAKk2ph_M{%f9fnGI1F}h*V3IN6lPI&*I~VSBR`QoS7pN
zu~e?(PgT9trFS1xtx?#-JxyQLBS)H*dIMJK3y-?cI}vmJ&3$TI5h9=QITi>YA;P-)
z`8rzDo($`SV$EH&tjHZsIe{Hc#ZZ+FBjpo9uBHgQyhps*FE+zexqRhS$KT%PXz>s!
zH1s|M^lfqaGFo8u0vzUNB0R}(=`iKE4~S#9w|X;yzXDJdmJtsFl0}R;`%D?X-(<iO
zW>h*4|HxsjNd{&e5n%|~QmSKY?)oUq%^CX=On0ciIX6vO@xCcKbMFd;*ghKKDVSl(
z3zVW7URYAGEQLc8glIjKIU*Yo#M)+?{lilEVuE%nEM(^blO!W|+3MwQhlHRFP^W8!
z?%7%Rd#)arxG@{B+@kBROo|Pln|79UnkbBP@+W;jv@rYYYFA3W^zWYN$_2`kacQ#J
zp=kM&rzZ3Ig~L8+Qxd|O_<rj>dsE1qJ!Mm7n{`Isl4=|^enPn8$7W}x;xdBj7i8jx
zQ)Vm?zHoLv?~LY4TrWEKocg>-TX475hx(talr`U{JntVvGmqk;5L72KB=3;L=q_8M
zg+^mp&i&?q6<G~gixBG#azt&{y!ZcY(3@|Zbr<%-Us=5x@L+4Y<Kv!c6xH6pcK)6Y
zye&4EbQo$fr-C4yHxaQ0!lLc+WAtK#-5qvS5+ErJ1nr7W;EneZ#Hx|T+7R^9H%-RH
zI8SU|-b;Z8F(Mhi+dq^_J{1Krfn!_QRtFCufW)TLSq2`lBxqX6=mu~MmjWm#z4Xf@
zTIDpXu#uZhY+9sBeq`~xcVIdps~Lx7<q_%^3RChV{?P9kteoMD3s*GC_~=`B%GAY1
zkr4{!#1umk#hLx?9Mt82@Fj1fDSK_4v>vV5>bkeVw0T%2o2XX&x8FciFy!c5p*$n@
zpTursnDtbb?;{AgnB|V5d#xmlshHSXeyR(~8h||n{`m%Knxj|`&R*Wys=s&EtjWFB
z;NEI}lN;SeU{r1=cxzpK^X%Q@{p|Uyp%Uz6yV{bvWSJ?OL!z95_Kj|^z+=%LEkkdc
z;psr<?UQrQwYQ>nx72U8eyZ$==J}QjBN&R;zO~Mm?wbVbmUV2Bz?J;?xpn&8nBWmx
z?@<XnG<u$pp&v}v^t2KlMe-8*S8`ssk=~Ad35luD?H9k9>`*sTp9#ebpl$0BP_46N
z>EBb~T;6%-kj@cj8?<o&2e!fGXn{Lc+5buG?ZBUab@luLRRotj!3r;)D~HZjC#aJ|
zNVd%R&rjUVdr21@18D`_{P_;yD)TRyEN;=kty%?ov*;?*!Yjy8u~!JeJo~~LrdM_&
z%t7NZUQr7veL@=fM$_9ou-`^mKkzIX3+u<|o6OID49PgG_tpf&bYMD;!N?>y6ZBpv
zoOtUs&_Y{DABI{tL~C)O96vRMD2OA^Q{TjSuh@Ahx+zqV;9dYEeftD$4iRbYRu<^$
z;E(kHbFPrhAeLH@d=4I!(BM5mNen>$|K&Zw7RNCTza$e2iNwF(k$dFA5?wdz4+FPp
z2SQHe?TKuZ!~U$bzo6O(LuJ}XO1-`X4h>{~es~lJOqeW(w_lP9!P&wa9^r)j6*e{X
z`?4C;Sj?$&!IPNAR5)Uz@;p@MBa<IvmEfD`vM|n4`49iAmWXgjGiJKJC_O8d@mrSv
zugDRH8}@<{uORq}oQC^`jLViB*qS9jdIR-$UW@T&YCg$X&%eK#wMIn^>#Ct{?Uj?#
zwS17#MH&I#PgNH<ci9H6*Q~}lX7S{&p4*(equI+_5rqHeOnr5s!ZyeWeJk7PX~@R*
zu$1(>6<yAw)_oUHY4Bm+!U??BJikf^;=pgwslJ?CupjdMwy2ZDMM_Rwv*N;yS9C1<
z1DDq#7&rK`1@U+k@TxrWvFvwJUg%F;<mV4#=?P^7UZlfUeI8o<8W39wRis}Ohg}dL
zAI(Q0Cf@{Ty|tjp@TluXfLD9lWA|`-y;HbS_;D_$l4<PaEC-&%*>8godXn>hz<MK{
z&vvcm0M?sp|MwdkEY>G;8cCEL!9qq-Su%rae_C}W-@aB#DOa{j&fIYak-LoZmt6L2
z!a#=j!jrLjR_;?W&j1Yxf7#H}-W`!{z^~7rU{q3eH}g7u%%urN95!3lsCYrz-@qBk
zs=(>iJSmp^9-Hq49%=aC>Hs-{OIpC(`E9)#MES)6Fb31|XktClza`k3CA<TD7SEH*
ze1SKX<IeSHq4-Z}gc$~m2%Ywd@Fa0k2N3O+yyb>v5YUqwNu$4^_|)`z#7z3UJOI_p
zBthfkM;I*sJ0;o|0Z#RLBgJ(c(Tq^z=7S{3vVx$RrHc#0{<{-COUuY8F@tsg&m^QQ
zXAcl(0^Jer9y?5UyL42c?o46l-gH}adr))rn@mB0eKO8fmnuXQwNZRY^z}stfoOR<
zHw&cYsjJi1g6h?tEhN7G<jqo>7p-rhh|KGK1(Q?<YN}OpMuw?Z`agV@qekyvYQ*aM
z_PW(n37Cq);jGxj4aPxD$|06tEQ4pA#||{?Av?*{X9}W{wo4e7ykk#^8@Gs5+2uV&
zIm`@E-b;?)DX!}6Gj7_@96=RAlw1R@((T*wO{__~SezYPwv)aS?#Umqy}8D=C-h4A
zV{0gG3@{IwO60!_PuXEOcRLvQUX%(8d-CiFf@z}b|3x~@$@2tiUISPt`rOrPrp=5S
zrfP`2ja`aa#`WB7;k!vdmAfu<gpOTd5KKXG{mWoFF7BVE*nCzUuwtlL_B4H-tP4Jg
zwi*FxlVv*Zp5v#3hPoH)b)`|3zQ(?fRM>@>tciU1QI^ix5~FWrz!B+7UJ1x<a9nP!
zW9YW;-s_<a=~qRh3ikJqK%m`W?FKTvQ-<WP14WGrYcTf6$ijPSO?pR(u~Ko!M*i*f
zm~fX8RPW%eeCJGy7!9OhdvoI)-P>(w7;7Ck$8k!|#fc{XX|?e(tm^W14Bk<}cz&9Y
z&hc2Ok!5TElQ_#5I{D14ug9bVMfso3O<N!T0@_sCL-I2PR>#xJBY9dw!iot`j{@RI
zU+;isTX5DsUsuz`x*wcIJ@}ypOzei5H0cD0H1LjRN-ViexEde!nXP)W-DktOy;h^i
zSL&{RJ8qql3oRcwHUJ{`yLUmt?S}Kkm)vVx<awzMkv~fYBb7VBaqwFM6^$>-WQ9cP
zgf2O5a%>;2<bJEBLL+l5%GW$nkca$>M0L#u>gX77)+;ZdpwLm(#%f2w{#~MiE54Zu
zBI5CDR^hJV>~4FpC|_ZFL(d<1T2-3tY27W`lD#K06XA3Vz&VEo(y1Gf)g`Qx73oOH
zm!BH7?5vvJ$$S6@)$%Jb{vY$|)kx0*d!X{BIN#sP&X$@VAJ#xVv9qIz&*6g&?c3CS
zwj6ok6?SH6zbcDv6xK*}*9`KLXp6}J<3nO)b$FnDcUT`!ZEv+xm)hj~zVjHoy7m}7
ze1n3Gfe|Jc#qn>a%i=UHH$-DSsKnBW6XzH+@b4|TjuZ3+92L^^{lgFVCs8!)l*Ui3
z57syC(m#fszz}`Iy~I6;m1>m+25j<~TcLO^$5}iPFILjA21%XaVN3nSDLGHWB|=b0
z^ur~(A@1;Xa3-SyzOZr9LCR83JQ0be$=+yP4JbYKLOyxoDavBOgX7M*>)rixfyt~#
zZuS}T7@U#p6@%xIg~{SSb}p^BRC{?>iey1GTTf9t8PPE-b$0k`w$qMc13;$nhm3h$
zj=#2%zBC{4jk$MQrW3qqN4;t<){MF@07I7q39ljBBA8Cv?P#vwp~m;{H~QdPK#RyK
z=sZx=k>VN(Adfa-y*eIv#}iDy<^uhW#f9kBKW`)*cYk~Ugc96<NkFe!7De6vZS~Gz
znKw5sck8(c1zrh^iRDQY!V=9TL*rDY6Of$H$}Oon8tvkTc0^{wV$CLm^5k(5awk^&
zP?e6v$1B!|n2*>d6Wz<Ns_6EMY5i>jN=$Q4MNr-KF;kMLyqJH+M$sjQXL1*0uJ5mO
z$nlF4jFi_XLOao*)BeSJ|NH7^pRdNAGk?qOl9kcuJ1K)}$}(M^nXr?Xz#s|ANYX*l
zvZ)b%WSxFnc#9z+^+>YJURHL1h9Gglkk{kwE(xVZ6o|9sPw|59&#|eRig5}k)*E~c
z>WL$!aKV&lLFx|Uq^f$BvT@4TDydAt(_+Ir`iPyWB%V<RjYwi>YePDypv7M)uHb<B
z<jdsT^>3G|@`8E68an4pFXcJO3`)u>Pl=!xE8PWG$6$oR=db6R%y2J2BhvnZGgBCF
zXf&lNeCCAi0NO*}5D7az*4htxdRo7Ow+H(@tlHk@kJDo9h5(9NP_w_4UQo#gynr}z
zU%a{F8{k;mja3J=0HLSeFVJ0-_@7QS{zT988zAc{{e7$RhUzy8;&)1p<Ykg5@ZaC$
zU4M03(S^ykyCxVDr0&<@WK$aOk+Q<NkZ2pe2p&%DquQ|djCoij<?+#S=<0NtqTL2b
z8|rk@Qv|=)+l0;42}e>s85X0qdw;&!OWkUy<u(T5f|yAk$L0h}kxk9%B7shRFSdre
zT}H8yrKoUW<wJ#56ou0<ibuM7g*dOU>NrQ;MfR*R&5ZcuyrrT7=`vR}n38?pE2(+U
zH47Q5>1i-J+^Y=$B#<%bpc!)@V706G)ag(l6X^Laf0V4W6bfm?)oRL`{fp&Htmg8&
zIs16jhRZU5N7Wp1m(bOg`p9|P;pAJ=wPJIfyU7iYD1Ok9+TUax0Mn6P_xCqorSywq
zMg)&Nc@KnGC{%|3JVs$ltB&T(WlR!HzqAtk8Y9^6E%`|USkbQiLTSOD&rO(TS~~E;
zMgYZ!ZDI2NglQ@S^s1&nCAnIG%7X-;Bw0zcYk260!7=y~x721)Hc?0L=`4dKt2uWi
z+Ef`qS~zOlDLh@m&Yt>uK*N`Blc+~J-#{h&aoq7d_F7dxLCMB35@7@c8JfENup${2
z)eUwR)C@6H%3}1UhfKdvY^owHeI1K0q*lD$cN^U(u#67ZHyus6Om_U+75vs%X$L(w
zuCbC@E(6k7OHG8S&aRcWx~Rv7m+;du)~Iu5hcydvyghuiUGz}wcaji_Ia+X-?6fXq
zp}<Luj2=C`*N86u;jbUZXH_9!d1xT}C)(dTf$w7r^t^q_5Q^Cq96dPB-#PIHl&Fj>
zde2IC^CTA_(ur!S1ZXM#h}UJ|4b-dzrl>EK+4qbbB~K@WEyR=y*8~g;Bb4RDO}&_z
z()>Gg@Y5UO-Uf;-IsjKNy~W-u&=ZpbN>Yt(WdoETiVjT`nca%I$}~p&JTLGu@x{Z!
z@dtRd1vD2@yan=7*UOLY66c46ithAv{a}P^Iz^#B68U?6RpOqqc!xb?7K9VdlP)?o
z*eZQ1$;W%S%5T=dp37t5lc}8Tc&nL%#&N3R#;o%jUL6Eit&Ri9@H-Q~^!aC!{X8zJ
zfn14^lo-5#Q6XVMRIgm6_exZ%f>W*JbkYoxVt4&2F8!53dwAR-<K+6;Fh%<F61EpY
zQGEieI9_9s1^91h@&^{2RcgdtjZW90v(SO~E{M>%3yMOvPf#O?_=H;|lXdmPp(Oo_
z28^*(5JLPS^#<K-;477|-Qjbz=ZXuw{q>wKEhpKAt#c}I@SmA~`)am#-=3E>SHn1I
zWq)05K^=_lJa#&s8vV-T;ybZM_^<Lo9PWaFEE8sP9NqllK<9giIprCmxJLj}<U`7K
z1I!xQqqYANOwGfELEn>z@Hum|gnug?QO1_I`+v0npNwOYi$hb6Ec>K5=ed63vcN``
z#e=RqC@V7Y(QPoEf=q*`L>yD40b3itYnEP>hw5mg=K0>@^qXUr{p&9OQ*#O>yzJv`
zY~zu1*jf}N4plQS(w>&M2N3N@3OTJ0#w|U2r@%lW!%>3uuk;knRP&zpI5Xt2;XcR-
z>cNs>6^KtWgpgit)h~-iTQrz3o}jZd&B+|5ggQa?KBvD;k(nkMF?u3e7t;8`LWY!(
zjep%rO%*Q|qXckIm~zhtHBWFUK6g;%lw)M6b${&j(Af!xvt#7vIEEBemd%if(aF(&
zDh$64nY8hYu#Y~DT0QmEiEEe_X`M0mjVmw<4fK8(<>Of6M@@L}{abTdsxwKi-Pz8*
z{6NC&QDvWmeZlujI0Ryaue1D8R$Do)CHDQqS|;hP6h{^?9YuF=qn1FaXpk6uhQVGy
zP{i6-_Cgdaf(~UkiLQkfYiM`*Q`B6099^b@coy6iJm69Mc--LkPpo#6&ZNP>+K*Zt
zBHfxyM&@5U=3!VIEiSp${{7_3bMno8Q7$eb!AV=xiengDOg)|U%Nm*K<cU4hKa}b9
z49Tr+;Jj-<U>7YkWEHk(0d0G0ksU5YqaguxI9?x`5~LH=*_p~801i=FQu?UTm*^x|
z=mbt<n%NH=`mcysGNl2D;Jy)B`lHnLDDXag<!pNH3Lo#j`ybfpt8eDT3cRp1wBl61
zM;`hMIh<qodbEX>#`AB`G8M-0M}}Ay#dt>MsdoCrBclK0;)t^DDPo-&kO5d=Sci--
z<rAJnsV1WkoScQQR+v`bJlC+1uEm`w0p};o%l*ODX1eX>HobtR7v<fschwK)<{g}*
z%jn3EJs5F?NU<4h@9iF)2gI)n2bvdinWxQrO@))##V#0r)^hsajEvCV##TRkgPJGa
zxUIFcXzv4U+=SC1x1_wT>&s7fD%!ld|HpK7{S}zamgE?8#kjgQ7cHUv|2eKQQ!r;9
ziufY_2+B$k&<YLp-r)n`Xbc@|-|o`abpx(_If0M^n1J&|HV|{+TZs_Zb!yE%mMiAa
zwDaPs{gd=gegh|7pQ|ml{j8L22<cviPh7bc$wzy!9OU4NQ;;RXYojJ=u+?&pG8yaD
z!*7T6iBVKgwUk%7zqL|=%TI}txM1SRAKO^OPA)!}44gmu#NbT~_LY}_yutL7XL(Su
zwX-cur&nERiUh(Kf8!kQMfMra59HT_w+-OXAz~_B>0{*uqJvZrVEGFQ6G;AE;7bCZ
z>h#&?e)6ywDK%LSU;vWZFVYwP+3M6O0aQ&2paM5P03goBf$Z#T<0|6Y7={qed=M<1
zP4mOo4z(m|w(qoFTGetm*ufzs+sb6Zqy+AN6v4-+xD-M&^VNF8A8V~DG`gs(AEX>K
z9z?|*M>zLrgzi6IP?edUnC<4qw6cxKEqM98wSM+EFniwQK7f<l(*24&r1|{WY4H?{
z_8P@W8(eNBB~u>XS7CTc9@X=!+J#NJZA20Lx0*j8fA;#0kaBcCca8sRg|m5iEy8%{
zkH0>}cpnmX*!+iHoSzUu@{b*|1%4sQ;a7lh8=%)$iDC(W*8eFP1t&tjwv0d^>w9L|
zv3(Mt;4rR%Zq-@hTi^<AN$ZVL=(xOW{MT#oMp9F_(L*tztGBQK%vgGOwf+lbZXABP
z0D@MQ_Q<b(%-ShyE?X);B4Nds+%G;N;ZVZ*jf=R>ddSx&$P~FtGHB-Dc2bg&J?91Z
zwVRS*ztf>rqZ)53t1kg`#m`bTS8|QKn;tE5sl{JdZmQGW(PLR1`AJuCSICG6<0hcu
z$#p|3tM-np{#Ne$nUD+clHKg0(5J-L$i6~ucBSF$dZ;hnrmdgl7vvST=!@Q8Qdj22
zRrD@$6{KNNx+gB5jrP(GC(57^CD4x4w2^;rYe^|v8p=SOPAqNZTU%D@_~V#hrQBjF
zX;J@m9mllAG3_=5TB$?l|0fKy$$pNAkKM<@hbgrRUyHq{|Ca`i=k}sGR}~Wk>-e|2
zvUNrrA*98*T3K2cN|y?1=65^Xw#TB~wv&@<bDq}-x!J9>la;O93jw5)Tj`qo#3L=P
z<7PQkl4%3|pDy5rjSILTuNk#WO7Zm2uV0JWI@J9g$SshpDmXm>d9JU9L+z78Rt0V3
zz>DwsYO*qXY#qkG(N%Y(dK26-zA>q+n`__q5EbaPP|`Q8RNP#r8-~^9AN5yPdOhD_
z71ge6B&aK$w%<PsPw&;tDq8b6>De7$Sr+|T0jhBm-5G~9{XQ+`{&P%1Q;J49?Of_I
z-Lw2#W?r1rywitHM(W@&Ar16Y48K5uJk=fsf=eoEM6p0^#F~6528g++>g`57&Ot}T
za(4XxOa93Yj?k#ALr(vX@H5E5LKnbHl<rI}+ym3;FiJoa3J9Gx*;*HUtJnrvPHpU;
z-#PUTPklYL3^r9|mh@A<IjXVeUSz1iMZt=Hchkhi8`hij<2ItQ9&kP;N@`mTU(uVF
zqTz9cyu{s7<^UtfV03H7Jy+gsJcjp8K);5Qb#&U*kkxf!{dO?zaxn^n)j;H)kM|dE
zCStdUCQEF3OMpbxX?f+QLvQ!uM7r(S$(nK6as;d7azC@pbh*_SLTZW>+KGV+w^o1Y
zHbgYyh&DyXp^Zox0HPn906X@`ikimt$yQr#;jzDI?*oH-yVhQItN0KWtD{brM_Z86
zaYzTo9V=^@cM*wf?cou2#9f>+vN1Ubh+&+faZRPqd?zuYz)E$_sC##lp32j7@R?rL
zP>DbI%Fttf6Vh#-wc`LkxqfG8Wd%pN5Da74vW>4KS7UVqJ}&n&vAf~c!zJ`05Fd+h
zeDOAdd-Wzm>&93T4<H}aoQ=DfiU8X0D$qJ*n(C<z_*kqHq-M(oW5oBPY7G!uJvvsF
z@t6NXjlAz4-#+bKbn;1mlk9zL|H+T`bx&NT_xId4bWH1$ENLA*KRb(YCp#*F`5X$U
z%mS*uPbg<C*eH&U|HuHzx?+bG3ZD(NPf{q2shh3h$y~oTz)q*l^CUWm1P)x}zs0I>
zmRv<mN2C>?BaL-xuzn*2=NQy>9`rwafc?UKgy8Suq=Qr}FyR9Ux29~GSYFJiW0KU=
zMkfEIyQ5Z0rRtICMEM<iktqN7$(UUQ=??G<%a{*jP>NDLkX~@s9SYhy-eZx+KDOvA
z|36YUjr_sT)w<y`MMg;F?Cv~|i#E{MY2+gkPXI6CR>xj_#*>VD>;vDYTvgU%cw8<i
z1f#@TVMrV7*O-ckIH$vmF?PH%1XLM0`Jx<q;tM(!h97Co!!gw9$Rx-baF}^kkDb|w
z;t*@!NJ>w25%S%SG}p2jD=iN$qP28~%2s-{YQX3Vr<26z%WWURP=yF)e(N=1vVAqi
zi270^QGB=-y$H@{4f$K=ti<>fy>js`mMY9dolZwmV*f}3AOnFAEKkjxRNXZV^Oq-v
zU^-p?$aIhc68B4Mfu~2V2f+WK&BkU^e;$6pSNIUp^If|$fIM#+4~H%jG^oMic3GC(
zzyOWWPj@RE<;Z7*VNsUwrGjM3YeT7kwy@LDhuG%B%UPEBx2E_$^i={bSk#JnK~JKr
z^i&o5g=&1LS@v@}%N&+D#F<1Uf(zN9-c2KUeYlnT61Sf_H@sa4Ep@sjybym7F&b|T
z>?Z=azLjotmw_bEsl6kI%LF=euQ_T;=;re@bU}|0Pu@tt%d1+UTS)H)^T}sQ^}%9w
zM$Pb{YsMgF{MN~z-sjcPKe+ptoAe3CFeW+x`Px#~gh(0H6#FH*80qa`ieR;&biznQ
zRuUV;tHg?Qed9liQ5u{`9H#Sp!~LX3D-Qf)4k=4inX2$V#YM=1I+w4Np_W^UtH;&t
zR)p0cXRzGfZ{YbZ3qSn8K!!C#2%vA8ji)>$mXz;3rwnx&47S{nAS$^6aM_duzE#xM
zV|DdA&?luIC%MspFVywU$fVHo{~TSUzZQW{Qv8%!5kN>ak|ZF?^F1JmNtG`hApEEY
zxgeziLa`qtZ_=MAHv}5W#)_$i)<3AWnd8^F-vSdWHjKu7`7ONj*@8^!<L8f$kS1md
zs6%Wnrqp>VHFn0rg+^9Ci7Xnt8T#|CsmEL0>9*o#u&a8LFp_{t<;BAJf<8q98jc$B
zS54$hLoP*I3H|96S??M+zrX(W^%Qlx6(v29i2GZ0hKXjN8MUfPK}jHNlKv&^7B!)#
z?>N%fMyA>2S+Xk-9h{~CtZ`Oo%p7x25LiFana4cP<g4;4obeF!c45E(FdjnAm*Zn$
z#!Tdd8qvoef^SLe8ynL0(pxPc6d8DpCXSJpGZM%wWsX^@4&Td4d+{RB>*miL#_>JS
zXg)%5o2V4ZW8n7@-1nDTXohaTt5zQi5z3j36a5;3Z`}XFay~YNSra5w26-8f`eNxB
zT=e)A5bs*veWy3tK=|11fackkJnq86dv~TcK1sqj3fzl_7(%WD3BhL_<eK9%?r-<C
zFXV;-#bshmiqtXw?kV5rkr`nz^nE4QB@P9z4H$+X@V9ht2;g;=3{n0?Dq|sAqxr65
zAz6vj(eqUFVajPB#FRtc^i3c#;i`mv>c|E9Fx&+wKtdv~Bz|9<d!Bc3?a!tv)hEuk
zW0J(Q?ub}~T2wkyH>D8-1%~}AQ)gyQ<Dh*gPABMo>%6MUzl-4od>vgWk<epiz?GiK
zs|!cVQ94x;epNq14{caVb#Hg&ROL+wqZD$4!KeeqWo=zml=35#25!5`tHjv+RQfl6
z7bvew0j^ZjkI~h+Xbe<i&vT>VRXCtVIQHLp4b;1l;sjlPe;cp&7xJUYz+zvi7Na-`
zYt7T!1=&Du6MJ&vBr8(gcFpmZn3X8jUdiV1w1ljG@|V0hYHT|{vmHJ;uf8(fe4_ib
zNjSv*hwi&yzYq>KofQKY5a4rvt+fQdYxn80FI^v^F$+)dbZ7lXx#p^6@;X9aWRVY2
zD&52#Ng-H~435$w)b*~}!{Q#JH5AXkIzaJVCv^*Dfy2US24bWZVYr2|q$lMAoZ-vW
z98lwxu4{C%9;GrML#)72?nq@ccvQMS&SSbjrnGLhUKni_Ck#yUqDRv&#y?Ibx-gaJ
zh0~W6n}E^Fj?Q!pDHn`va0vEH83Nx>665&^j@I_TkL$I37U~W>KRaS+#qQ1^DgYf6
zuIBia^|Zcz2s597g1CE(7+Um7;9pTCEDRHg=#JBshJyJQL(6E|Q$!Fbt8I&cVyPBc
zetmzxu6+1sXt^7wOLJ=PNL3p{=dj15wg<VUw_l~3Q&vS*91+O)9Su!)u5ugYw3gU+
zLz<z#DWh3WF0Yg^L(ByX-|!sL^^0TSuE~VkWct|tp{|c)f!)DgToEwHn!g2%-X^b`
z13h1aDW`*idIeQvTm0WZh!c4HJEnMRG!3?)LILa9S1lwwhTi+5hv2sgp7=eOhw%Qu
zK>P@Ydo|S=F=|V~rH86?u2c@bFOl-ZNLYd_W~;wTcl3?PxMj^4tp2Cfj>1AsaTKcZ
zLQqvd&tFB-bD|~4M-HqZIW0m`rt%z`FLjnW)O*XB5e8jsDL;!f6krcTFEa-Tzl=-6
zJg+q0SWG`z{zj|TG#Z8hGT;X;6j7paTshj~KqPB;R!7C9;=_qDD9+GO*~tqe)b_7@
zA1>{Z<;n5qre{sd*6$ypIu(Bmq(JW~8&xK9*YcF!FQ==lIpH!ltxdqQM3No7s<B(1
z4w$9pow*P%5c6~=>g-=8o%8~xM3uk?>R+PGQa!DKhW=yLE3y?L5ja#8aH>h7pfjV3
z67ITU)BFhxwTQ_*V9n9~em`d{vBg49AY0CVdeRu|+}YA0{K3uRQ}$w}hpI(Pe7e4E
zQ}b&r!Z7XVK3x&AupJVifOKyD2(M-iSf-ku;TG-3nngifZMh0~bdAPy_`t8=kLehJ
z+)It%pBDw`H<7#POoBO0l%gcg6Z^2!eo?nf-VkHm|NLc_NCalJ3GTQ5JR@NsSNG7_
z&ZNnh{nEjdTetamuHh<=rGV5EIA!K~Kl&pkH&t7wEOUs+@%wJvEnqrz#+znBiDyNg
z(hRMLb~23_C?-foV)4m(F|AQgd@leb8t&<4Wbg=2?*k<MfglRr$b{GXw?x=%7w9&w
zdC80A3|pW6l}?zaW<R#a8UPZuO{PL(1a?*H-L`;ZI758_5G&~YFlG94mgZ@t^%W@U
zDFrk|+dW8V%bi9p-uB<UxU;-o_1dSsCphW;b8GflYqx44+?<9lt-sYSVv8ubzmNIU
z>8og#JEaRB0c+?F*UA}Lj)Jtda}(*mOWo1(XHRxf86%%JSSZJ-pfXg48UL<ocbro@
zE}Sz)xIUt_{l?ewW{phx&u5QGb`F-QFE$CWt~8K6T~3rVF+ofJ*w0edC7f7H1pKp!
z{G5U#C*MZ}><$_mK8yij@tt#a;F!0$0d2WL!gf7B)x?D?lT(=P>Ol^@)vSZhJJK9B
z-cysD`hw|j)^s`%Tsu0IOe{L{NraW%WlU<jB&-ldSSH6irZ3{|kld?Ab25gk%>UHj
zsMjgGQf}-)&$C!@(uOq_(DW}9Pl|{>)^RIgWqvZuM>9SCC&2S2yxRPsuv<8+&Dr(#
z$W4n2g7Zd0c|=-QCAthxd%HMRufz_Ee9RzTFA@iv3mc+rPo<cdJ<*)Xsze+-{b-%<
z-u81RFZg1~<*;?Jr{M7Np=VLjMP}ff_jY9ubQvtiAW1BGa<B5xfUueUnNOSXUna8J
z?1fE{KBn<I*bjq6EvSPVrOZu>Cfs6RiCu+Q-DKrJR#;Q2$)vDV1N*~-@eH)wf%C*$
z*Em9n))(H{@z@?2s&r{no@q-St?L@RV)Zu#rp8cnke6GQY}pgy>(n!eG;U@2^}uOp
zq_U;2g8W<VW`7%SAQ*Oi(YR3rJKP?2+rD7?oWIAMTi^r#Lh^Q5zuaHT>yfr>xBRA!
zKNS?vH~*N-p-PCph*^17b>&hm=LigYlg5pZH-T~_wo~XA1~Kqdtk~CGv%W`+a@?aZ
zbP7S$G>!Qi`v38eDwGN=b#@5UfXd%vguQ=e#q3A1u2WP?`J()7$ki#-SZ&AGs#h!O
zWlYDiX*&5Oh`7{@h|c0n%i@x`VVRHwm(-#<aTc5XOVyszMW|=LF;vY)!Bgo(UX(8U
zY)Khjnn_;{;CsdzN%P}9H{5*p$X6V%S+z2kiwvT%4>of|^WPiYI4E&=vcft*3MG5L
zVY>?27TjTWY9$7cNg;CM&^heH)jWWL3(wu=ftR8bRYQtN?-8<IyV$o96mi<Kub^tg
zU{v#9>hhQW{xL_ezD1<Om-~2}+ag#!hoy8h9QgJ})_w46{Nbc|+f}jj^B_{pe66cY
zQe|$t-rNNG{LQi~D0_G&;<aUbwdu5aHR@wR4t7~G>tu8N$YUixoe39xxjG%T(0q`~
zmDT4^6?>UYzf2<<aI0v&cPnhc%%aufq1Q;q6v}axANiW&lGn*X0au4m!_Y;2krB53
zu$x(U&ICId=<b4C%No%$g%F1QBa%nj#n?hFpr&yB%pWG+I#@?+%p5sXtSM+?0b%X)
zD)k~Y#`Bd!;QoL6%F#GOn(11w61Auin#Aj|@D2Zr>|!ihm0U2gO5!lKVvpbWg|Vkn
zt9>{)f6n~5{rn?Nq^_@V=Re%+VS@sM|I&OSL_1cjzOgbHwa0ovk#c{7+2kPuRPFBb
zCE*HkQ@a_a@hX!YR^T}!K>1)S#S1|?x-oPzpH)j0W6`M~S;dn|sU0=$p}}HxnlmcM
z9c#*b@7>TenRJ#pi-2sXfCjMqp*!Bj8E*RDjrn5p($S=r%5gZ_+Uq%6knwC{<rC@`
zoKZSJcayG3qPi>|Co)f<K@KlH<M4f!mH!7?VnCw~`P|osHo8%>uvE-{uhqXNZH%cl
zt!2s=i}<+Bn!J6~Nb0Kr`MD@#tH2%e@L$Od7zBAa&y@(ymk#k5^aGec5%KIp+nHaK
z_T+LXK;q@T)09ypLY)w%;t8MIdue7|?iJ})?=X=aYfkFWWlqYuht4jcqT$tXIIvoq
zUGtHo>(`zGs84r>iMxUa&a7TMcVh%glLm#oi6>?K*={D44kE#gr%e-tWTq@T93a^4
zFXfSFm5zo6$h<w)@Df)8UmGP~kp|}YoA#G%Fz@hjngid%uOfvZ?z~4)V3@PD6I87-
z(Bh;pNIero!jLZJT&-J*XdJAVZdmwa)LLnQoUi9sRD<wyi*=+V-1z^>x5+W#1R}^m
z=kOMm1UA@|x4Ge!L3*jufT&xe6r{1YX0G(_M@(~WQ@#I8U|}KNqQ2f|?`#5x$xX8|
zv7LXiYS39!X13LtBXe1en-JK{DK}jg&2q6tDH}^;Nu}ORn-z+q8kKYK_&2~;`B`}u
zZ@zC}hLPQuSBx;s3vg?*GQm9WoglOuOt6yn=^%!L?H<9JD`I)%pu4WumlDM^Q2vUQ
zwt$yah4{l^OIY6X8~38ogP}h%my0IzZ@v!P8+!L>y8IXNtCu4kjZZ>S(+QWmz|>41
zGH0<eYQ|!*tiY~)DYuqP%D+GEQ!U&4F4B$Nt8PLjD6R@m=70|k<xTfDG!8Upy14JJ
z_t466`G@SQ0^@q8%K6~zW3~sU>WXS`t#rHqrbYE&jmI<u#DXjzVCbt~S!m1_^N&@2
z)Qr;5>eXO_$y%Icrx49>@s0M&i^4LE%E2TSENf+EAnv?|jZQXLX2)rIf{Fid@LK|}
zp-<%MrSA+wRd;RT)GjnG%F;oXU;n<KD}3m=y$ha=<5uC>H5eKcVa-p!D(SRpbvW>b
z`GPtebHzT@nN}cXu*J*iOuH^`41+|Hm<<ZvojXnWaOk<M&o28E9@ROmR5z5CCX0Kj
zR8B9|SIGR0OfGw6qo2e$Kegof??e<9E+TlGyZN;|9mL%f)SUtT`QY2Mg!|S_pd)-?
zl!H;@p=$?6zvfwMK}->RD)&L$F(*SPo>@c3P3HrsLwEWI!GS5xVBzdafU8yWW@O=J
zXdy7}^$1(v&r2Dp>@|HQOEDR2ZFJ1?9uc=%u<fc6#SHV7_SZWScJO$h0k2(TA9j2Z
zNQY<W8N5>9`@A5p7o#XA_kCVV&w6Gy_kla0;I=6bUNozj$Exq)8UT`R0$v(u!|s&2
zN7*ieJy<18-d{kFJD|qaH9~|D7)5K!A;X2UJ^wiC$_irDGWSW>eK)&nm=20l`^7!0
z^xf8dudH!1Stq<>K)vX*SE#T`78-|}dTJGx7h@%=X}-C!T8Ya2UxV5|r>3I*nNy_(
z4bh^;t=s%;>NE5)QlqT?$HfUvrsL%AET@yYYor!l4TVOq1g1DQRx~gm7V{I!EA;cu
zBs*FY?3V`0QCfXO0}b#@Hr{UGkR4vH#=Yw6sUFq~q=c|wfWBNk&o-;b<T)2U*jT-P
zx#|p>ak*Ix{ZED8`Dy<dYMDXRoXkPFSCrYgWs39ZuR-N|7#!x#5}zw70%Y7OCSI+f
z<rX=%FlYC8I6+9PdfS;tL4XaH8zVSe%1JX^qlDWr9>@95g+k$Ryy7f1bY2t(T9#h8
zPnb7CeETT*Tw*T&1^e-b|Dft2DvsK2=kCWY^RLs1PG)q}&lAE!QbCYAWv)BPtjmuk
zS3JBE<VV4^f|p6Al=RQbbk_Dw?d)k);KE0}0yhUoWPuN#*eui01n@!clu=Zynqnqy
zhS<M`XFK1dcW2+O9|{o$&1`#F<sSBsHEb(w8!o<9<4`<%V`w!N(dd6KdRFV^^562o
zczbe_!GEDAkUz**p=|fxx)bxn?$tuk-ZH|5JF_v$>8mWZa16LD6zh25r2=a#x$_sH
zPK(D`IFpN3$|);0+Mza+4%K1DHz=Xr4HAC^`sQh4Cs%OOE$IWkk|n^*t!IG(p@Zn$
zVtL@Dy4oGPtlg4Czt|!npB1yGcHXh4HsdD&!T9ALPFmHa8ORpqU{#%mUv$>8?w{v9
zOXLe(D5tMR^Z8qYmi4$XNO0jdTG`np$p!`{o=V#5oL7fg`%>=+sMWrAU^emOUzdZL
zuFmRee#RY~b{z*>-EORtrg#$GWSTz?l-rlztm0Xe-usnW1PA`OKa7l**(p`8;`8Qm
zALWS35~rNJue&%8Hpo`CsE$T(r7-tSN6L%IkqT2420}^y+`iDoK=V|32G5|G`b^8_
z3{(Pp6%WpE7WXXGB7ri)72GgpSdVWjZUIb%pN;F`UAiilUbSyg={q(*gJuP|8O~wo
z|HIy!heO%_f5RmeX|bkF#Mnh8)X183Y#CejqU<Wm5JHs7zLRAr491e|J1rtaVeDJk
zjis_?f8KM&^}Vj^cis1MAJ1_-$8q1!^ZUmkoipcoe%AN<^?AKtuWp!QOup59KFC#P
zHAeJzjl@3VXCfHSNJ2{ivUJLm%Any*ZTouk;7A0I?dtferN`(an)xgtNR(jL$I2@<
z4qMgS*E8CFJfiaIG=AF{hIEZ79jHIiB#WV6h`!T~x#p#`fw?aKmEU12tvV>k&SCIF
z`_udNKK-5(s+bmIM`nztSM_@wqXKb4pm@BDEeO+~B*5vWDUwtW7071eupt1k;dZY?
z3!Fph86>KO4frLsaO>?C#jj_t9ZWi_CMg>LYDve1{gzsOPLv01x%wz1G%Uo;R{A|B
zeplT?MXp!2yz;JdHu&`FU;|76PQzFO9U8LIb>*y&6@D9fRly8vbZ=H2WT7c}ThE{3
zv_hv<``^F(VZ+DqwWyTr2dPN2bDd06<+j=F6$DE3;fSfh_mqcxe5W2i->2G|zWFxl
zRnYq{?$$0Zlj+b13_XGULjMjIz)9khBJ0MYT~VgQz3jEH@<6xJ%Xv~lqU*jcRs1#Q
z<8mgc>^Pp5?8mDbCdAzhcHeaKC8&$-#a&5HAIafQG4kg6r7PQ-b5TO}w9snKtn{@I
zjm5KN_@GDOlA>j<$E*&t>r@`}X;mWf*|(rjyG!1*+jD)-neie}p8lpB^tx$r`14Ni
z@o|(YAnAILy#TSd-Ka^P97V<}9a_Y?TUK*c&v}u-s^!)@l!<wKW~oMuLYi_d?1`(x
zKAT^!b#Ut`U>m5%23jdb<^)3x6%uz1*0q{{h@?(G2&=&D35JzQ5NSB@p_8U8?4s58
zPtJJFSqsDVT?$!vI+=y^UFkLcRs%Lfa)mDusSFC(<V430NW%jESSz{;;qPjh`nI>3
z_lEa{`Jc*^druu*#Ilkck<Nma-+Iuw8#8^GQ7EO1Dq93^RaZi1hS29>#b~xk$=4I<
zPZoZ4Jn0*ISoPxMLPU|q)&`bny5KN%Fo^T;*ema3KV_cG+rt#S23wi$cADVpUXIS1
z;{y8RA;@l4MG0T>U&;Yv^KX`OEoQ0m{g~lx)|W1t^ZLaS)M*+d48aqIpCQ9%?YsFM
zjrSborgt!iJC<nkPhMLc5uwU0#zzw!<v(N?sjy(Y!%e-t!;!8|?#?^`GND1WhGjM}
zYB=N<Q16O}uBlGnS68;rog6LCsfK@OuM}OhSX^$*eG~P$^?V^iC^FeVEZ)|DDd1~*
zk16(}I#0L9ESFLH9=*2qHp-xyb_t~mh4!XOrwxDBex98eBBoZq5y5muN6LkgCdtyZ
zO%dvyY4VWqIVse%fq?0jKVpLgleBg4hdbY@Tg|~?sa<%wVE0drJV_jwC$k#8OPM+!
zR!ixwjg_umb}qL##^fMc*Z!JYg{UlUBlcM}t&+=7Nj6f8%ORcaBI<6~*U&7>!Pl`u
z&O>sYwiAeES<#M*lnLWiM$@A$!m}yChNsufF5{Ewq#OyaRK-iVU#1471RdbZ^ek*k
z*l1zAbvUJ&mw!WvvpgVXEtA`dD;IyxULd***Jg&e<!IGt5zVudqMz20e(^lE0C(b;
z9|s!IjOHwJOq3qd;=9Zhk)?Ol^hn0As)O#d+VFvS=vj#u4}3og+0`}*LC?MN6*MZM
zV8P_chQYSg;<*pPjtw!BCqG#r?5GnTyUhTD6oo4r^bPx3a-HGA>|9@P>WTfdr{ntO
zapR6j{?|$y+MioaTM<v?yI?eK*72y`<8y>qj~D5;=;Unw)Q@Ao%HD8hJB)K#xg&q!
z`KGDsf{fY8OPQh{MtF3r<y*w+XPTB4`+UzUO;3++Mam)3*INX`WWS#>9E^XITl?yU
zJ<G$0ceq#vwUT7#9J>n&G*^68gd8==b>&B`HkX+-JDu)LY3ol1O`wU9L34*vf>dW(
z&sQ36mJynrq=^DLoto}SeE5k;TNUdyJyDck`Fm^@LkCS!sfXkWZK(B7K(sYvA%E?E
z1npPB^Rb`z4Av!;3Vh=6fPw$Bw=F)K%L*+|rIUh6F4URl)Wzw4)L7>7K_$tm(3~S)
z4ZRq~jC5U!8;pK+kIYv!S?@E>WRjfE7ptlAZbDP%v)O*<R&C2p#9-RWl`(Y6zRC@M
zf!FaFld&%(D9`xvDaO7~B=W5~#9&v}gB!E6v!1ej`hPY|xiEQqlPoKRP${ZOeDx`w
zOuFI&Ofj!aKekzh$)fg@*D%#3SW=|~wPv*1-aLIXZuV3jhRK+fGy;-PFxcC%H;qPN
zCPc_T8#I@G__;<o-N*CDVBI_OPZZSDyHgH-mZ~Km;=|Bqx3$Q29dtLp*+V0Y^U`dH
zV0A_Lm(}0j(@>%=8y;N}aUN&l)8Bgeo$&N!#PXjj>i_C;l252VF0=1|4{T=AZ&7J9
ztZ%549}7RiYd!<5LFtl`ODbd!I8rHj>Y{(T^VPjM{>p-q1>@&5#aj*H38TkbuU;bm
zMN4y7$2IWqZlfDX`HY`ZOrR$jhCj4V(m)4`sg_pxeK#%TXukge-diI}G|>kG#kadc
z`K8aiiGFtNb*trLiXUWjFPm=gFt9!(;HmlfRl|2B`dWH7{Ji~ux$*qHC%k$vc9Qy6
z8w5__HC&!7P#@l1CG+@wq*3x!SX3sa_X^^V!8H4D-wxvn>!xzB#;cs6wd<s5mrE;2
zq@QXwzX?)+e3B{t*GRwueU{^iGh`KSWGn=qqkLdu@;vtUZ$rE>H;H{=|KRvwo&CZ1
zZ+=nG0v5$pb5!y~&4e>zkFP!yp!s$W*EvBAjf-@9tIQ(@FLd6>0DF43I9=Ot#8l=l
z@hBMTI$6*HAIj!+uY*8874;-5J=p_aioLv8xo5is&e8N7Ohuo87dr55Xy_v4kC^8i
z>8Ffqmvc|;F)vACe@Ni&$<A4a%HBwGg(KgnzbeUGGME%XlHd38Z8ka=?gxWLGk{Si
zO&$i#!lltwo~q!{*h|9<ywC!<vk#7TfD#{>Oc}-htV>swQ)I&>tFIR4Rh=#J8yk7f
zKyD%pqcS}3Q{9CS<jRZY=aT@*q+Ip2J`e-V5aOIuG9Coh$s6X2)DL%&vxK<A3rq{1
zjXfQTl+AD7TYN1=^{wr3x>qdl6nnkwTy8<DvgR`=larCI|Np|5BQUQ#ep+XuE9JzW
zm@896pl;OX_)ibH6CECNd{(f5X<<@cm}`6t&AFuiiR{d--4v26bubVv?>n9LjiXxR
z^T00?sraaf;zB;7imp2)-{i_ClBni|pOze|Xsn&;x~ZSTSbRJ8G{t=~1*X%FrAZ@!
zfHfWXqrIuMpuN*)56<)OkV$N<=L=bUd2^IcGK`_UHvc-sEd_`V&s@6GC<?7VQ2dZm
zau+na<R|C@lcJY=Y3JbK4mb=x7b2gNl|(<?=OxYldb*a43Pb#K@{j&up}q{?|IwC`
zjz<oDG3mo}9E9g5eG7Cp=~Z6X!>dFN*MSb|0l10wxmL>G|FLt8R}xHK?u;3;Ty=PI
zUt;q)fxmshF0vNV-)9HT<+GBmOlmU2|M~5|za&ovUV}ZvagSF63_i^#r<bL7zWw*Z
zaH+z>4fQ!|Go6QDp4n2gd2K)5_I1~G!Bg&it*Cr{6Mp$QXsS)|_t#+Qg}--mpggHR
zKHP=o++)#4_V){3d?c-J>$`8Pw2(IETJdvK|9A}_cuEb0@(%{4N-$%3vv1%0$7@If
zm_T{xEYnpM(j-~XwPXK#OYNpuhflr_-{nOCuPJkb?m5}tFYqFRZ{U7?$h7PyAk@C&
zRe3!Bc!ry>sy$YWt~wLK0i6;1qU7Lzdg^ymprG&XA7i234Ua%!L#DCQk2?)@gVaz(
zH<d*aNDbAeqrLa<?ZBc8A3QhNDni~0&!9+0A1(FwzIZ4}3A@yht2l}WAXQWZMjrYg
z|Had;7Jp|GH2tPQvY|jO@I*I?H#?%EH3VsSbBOgHZ$YiVLVX-h642}d?Yf&#09g3w
zi1__dmDr1xVN5O~q1>K26Xh$QLDdbq;T9ltD1R$z4(tiKp(Nf$|4u0`8-y%z%~Q*R
zp39wmF5_5_&tzWbZMx2h0vkQL{BC#N?V_KxCwp~_-EVgqI0PDk@TX<=GAIfb7k211
z28Ra<n|(Sa`m=^^25#_LsN;I_JHr8|UfT|w65|@ElVnLf+Q+`EmMGqT_|n&1nfk@A
zDnaaGQ+KvAC$aGEpm_HAe}A!ONd(H8CQyIBc~-CB`knC89=~pi64R4SxG_*Ne98@2
zc{ZSc8tQV6!9D-&zLQ=3&Vw_16)b0ZtaBDSKP8CuQeh>DB<mKE5Tk%|+{h=AM7F0B
zXgc-EG8#s7xU!SH1DFMZ@>*s~$4`q+ri3d?XMiRh!|Ys#{@ulQM34t4Y7*}KG$^pU
z2-F$$15XIq)CL?cTI|q|t0gI;%zy<*A$%fI^e{TjK*oSH&HvWNisY<x_(ue8K0F(D
z5?2DWMK|aU%ra-zXtK$U5OR#GN9@9DLdpH&2rgjRkqy$L9)<!{zmjZ=`<*tcd1);w
z7d!I3_U!LDZXT~byZ^|)-^MNk>33SaEG$W%el*W<63twnT#%rpW7tljGHM)92Pslp
z5Ft+e7;YMLxpv>dV_!W}QwOJ3e-796f@tZ8@u#(=8EY^mDc#xtGYT^pKDox88}0Pi
zpP;qg1(K^~B*o0Fm{qNdN^o}Xej<2Pn~7l&e8P3jncP6YINNh;bLq>~Bi+EVnp(qN
zxb0_%Y1aY{ZE!aTn%=tl^l%~6dk)a|lLQ_0!53pa_D<>O(KWEPu>mopLXd5KMO~@c
z5ngfQE!k9iX0#a4=Z*NM3p<UB(n;PTn?;A2kX_&xV+lf`Mv5Y<y5Iog-g7NZ*j71I
z$WLanuhgL-{Y29u+_DhF8>0zr0N+Z0Wk)`|S2s!1bkH-zwGcSmE_XmV*y1#(F)q-Y
z^BX`{k7j3ARH_9L)N2CAO20ihx$53Fuc56JKnU%k)h`66n+~TQ%e2uuVv7WgnLv?&
z*U(LoYj2@n8EF{*-r=xk(0X|X5G=tKCC4%;odWkR;MC<-B?{bxBau*lSYrL2z{QAt
zY-i@6wB9xJOOdU<fE`)xPiTi&qY8Fa&HZxt3nX<AyeDM{6qVN*yY<=CxvV5DoC(Z~
zc2*=A9<6w4w_O2$hZ%O&;yeB8i(Zln0eb3Xtt4fTsflz?-xSa#?R^Eh3!>17DniDH
z<0L}@i6^Zs2w@K8=U;Ctxu|$sLpv^6tJ4RfJ)ayrZsNy9iGD&a#d=iF;mblk_$j;v
ztx(g9lgY6nvE$&$gQoQjyHw;MAjMWB6;23MZZ%Sf<+hgvv3jF?n0oy=Qp2gdn3|!i
z{N}vuqMimSuJ$zUySAe<?n#7|>HN>@J@j^n-JJVI3nhspA?1X?#~|D*W=^YJKl9||
zogecZm*Q;l6k`n8Fb!;<zyGoM|79w%#7Mpc8Q87$Hu+jElw8`O?8>DcBWbQx_WQYN
zd}h45cAyy>dw^~WG<wC+?725`5K2JNo(6f|#m>EG`BrK5Q&wSOCv@y(IK>sI6)(nv
zONdc?`sApQ^f1XBL5HY7#@+z{P%Qn%%V(#BvQ=6Yhw-21Vr3$eZqu?~dtD?c9LbJB
zM7QGc$?rY_!Ogz@ijaUJlPtgDJFa)*AjS1MP`Kx#{Hg+r`{v5Hcm}5-K3Sa3nwXMb
zG`D25cvn+H{zCu*I5!ISJ|_=k&1|*wA}Ce}+ilT#@}KF<yRF3R#eP#q`zpkYiPnEa
z41az9P{~`63We$w-<dIf&43H*qQ@t3MOwC{>ducfRT`+@)RV0wSD`tzT8egDFv)?#
zB#?$ny{7p@wKMD>nml+!-lRWTbhhOE;VVBR6ZzTk$rA^#4Gb3-a|&`m2e5NX;lsno
zyaj@W+R7RsA}l5SSfHU1xK9(*W1C5`t(`~d8MG5)=i%D%w&xAjB*}QS?_nR+x1`!!
z5t@!D4PnS=Q_v}XtYO|q6_{jyPSA5v3d=H05_o6oq1<A;ZSX=Qaxx31nodJPis~1j
z$2cWgMc;uGPbM+Rt<3rR?6R@Y%ZpAX;gWqO{%5bSQ5NRKwss+27u>!0k98HM8Iqip
z4dYGs7+tyR2-sW6Kl~Jbxh+jx@zp8&k7pWjM-;tn$|h6U(ej~3d^yi?&&lQ~Sn8sj
z(e9@deMcw`RX*m8MA6Eaa)02ls2|=3C%zZohK_(IGERnDmOF&sHxw!5i98{i?itRd
z?0DDY{E+6!EJxh8_fOX>Ue>IhquobK154+^0rWU5Y)hR>6D{x4ydoL#$rd;st&Cp;
z))|#NLdz9~mr1K4AZC%QRvD&@u2H`YwOiKV*SW2~3vt5DKrMQvSQreL++baFKb!2R
z$AUr3lAN;EK<@Ctt*}_t-QjNz#J0}T<0FDDp%<PfJ&9<Ref0OB4fdUst1K6QbW4K-
z8R7>t-(GMdWofR2bE+)~ObJ<cD05@N*f5$U+IvIx<h~rnvGYZQEfO<xR-c`~so`t=
zV{4uezbT}if`~yEUi|zwk`6w*z$VXJmfm@MN)WmdvDf<|{^467(z;8T_-4=PvAl6C
zD7xjRXTPdtUlW?-hu~<9L=rW?htSewRhJsihEEo<${xyOZG)Yp@T_iL3L4FcEmR~%
z6fS4X!~dFeqV|U?w9H2y3)MP0tSKL>L~msvd);#SYfM{NWKcBM25L&5RV7Rz$#>sB
z_f=|zr=<D7k(rz*A8_m&?uj2ANiEChu!0pr&L;&-x}uP-E!s7Kt)}$s_HXwx{$jje
zc|*XU<O|5CkEo@fc|usPNARTBd243a$Pg>Q<D}|&zKj+aOu5sT;oSEo$xf=1e6n(8
zl3ix<8u^X5xJ~5oC@`t2R@>WF$&=m(d$;}aMe3_kh;XZ)Ts(@_Z4Vq>qg$KlOA#xq
z9H?k?V2mX3S+Es7_XRm_HGI-_ka8ruI;kwB1ZTDPr06wYJNJAB>3UyI--{bYmKeNV
z$dgn}VFvf81s-c#DHUw%lK9F3FE)z3Xtnf!EN_M!UK8~}WB2Gt6pA=;LM$KolQ+FS
zCDGTVE{JJko`X_bTcakJH3m^_@sDU4H1!=RWZ*+19PKwkyA*9u>gY2j3}advJfT)y
z6J6h5OpaDVXtH_fUS*fk3k9-ISkX>nR+<+cj~0$$Kc;Ze;y$KNcG<{!lP_wa6|p0B
zj}!uG(w{?kG{9_{J;BIuxx*DfMnK%BFMS!k^_A;heFX4f>b8u8u6GzXsAw>MzA~>|
z-=n!p&msDSVj6dbVyFU`X7H3=$-SP_m45Wv4>ty(jw=xf94*PImnR%~ap~--FZME_
zJmR5pg1tp!)#cu5<l_jvdx+9iZOz~VQr7NLURJVo6BLV*=_z_~OV?SIg<KIK4f^^n
zZp<>&lj+V3-9{DnbAubuj{|;9O*o6~_mwT88^?veEpwdAd-uIiSn+tfc$kdl28XW<
zLzec<nYq|o@vQEe2C@G-*iuqd&)uwNsywAM#C4V{S6CV8-|Aoa*vIIV7QJrEnUglJ
z-ySHoI#tgo8#3|B!H`i-gWzXXs({2<q4tK|`ZB3+g}SU^6_O<4sCLd-krj6?vK_&L
z&><vReaOmd;4$*X?^89=D2oXa)1eO<(aA_%D9q5xT-`zxv(S!%Lbl$kANOQe6i+A~
zR$PtfD3Ivk#SP#i3yU0=s|s>81=2wXf3sxemKlR?oYe_#GG$B8jF*~5Z5~PmlfN8J
z{5?5>$;84$78-$^RIbRfrZt}C?bV5x)d@AF41x8wPPo@5KVBImb-dVtc0#WuyH9&H
zs&$xtG6Ho<D<$V^Ps40wYc73ZvPoN{CGw2Qq>W#zqWn?RYwo=&G}8aNQimWXuMNdU
zkkbbyv32c-@Y8y?R>8(xqk#B<%mRg%eoJXnyu294)d;Jzw^{MtD?CXVpi!Hk1Z7#R
zdQZ^R5u2$WFZ}CU<VGMfbdnjTSg_no)CS?lcjF#$_w)9<O2h<)Z5pa~yWzI*sv2b~
zg^7*_qFZ-c{@c{3LBk$>@0FKi1jR&`8^f&jS?+(m7m)0@><*DjbDV?qy6yZ=P%A#{
zf+&0XkG0sV1ai`v+nl=cQgrtrvD-L5hs5~*+o*!{nF(dE$18-gVq>Yd0}Kw_z+h4O
zwD0VH34iz@$F%EK>`?zg4n9*VQ+=iRP4SoWUv$15`7hTx!?mTRO$yXpk#9hh{SE}M
zMG!4nU-SOQlfte+Iu<+1!S5|uLT&`w>r(@j<xS7ev-NpRF#R(hHi6Sfy149HXME(o
zLdAV?)LH#i%j)&H#tsm<bDP9PuS-lln+MxK#-6a}0{<LxpnrtseERzM0Wx25P>?bQ
z-QL{IjkR7d&om(|w1=3GgnfrkSsc0Y<DnSjo*f028ZZ8tFz+HqL)!l1h9rv`xsl5#
zgY`f~sTP#=!K0XEW*5bt1z0GGPuHY;rYNrR*O0$U?D6;i06-o}o`LS6f5(5mu8Xz{
z1l?6QjW<9%gWWpIsIp9KWhA_4Ji)Ejsh<x30fodjWBaiYn;#5fM97%MFwO(DW;b?1
z5x;1F_3kexM9M#W1A;1d!MLV~<Z|>m6_WNM@P+Gv?2yT;`}|&*ZwJT`m?z8CEC)1h
z9>!5$ns1#th<`u4dwXA#6bGzfP6~VeOo*f;&E>n*8%1iB8QR&giVM?Fm3WTSabE#d
zN!nt8AJy`osDF13wGKcGC9`}}Jjcj*p7nv$vpxvqLq^(g8Z^|sV?^nEc9Fm11TZDh
z?zTJtLQyQsSGS|e5N^OdQb$UTkm({`^Z;*V9g<$%a1NEcE<r<Rcpp{h?hPj3cz6as
zKl8ggSzeKbVZUyN)-`g?V=j<V3WILEcdSVG|NVUgY{}#kdk4*$W8#(;4Jf?wDEdSv
zOnTkdY9+lTTSzdDXC`<W4YHYS15VFKaIAr|u=G?Iq5EzyHcS0cAlpUWRP;0QJ7w1K
zhW53&3RvmBD`WBJ;lkPpdnYzDr+NBuitT~xg&?7TzF(l-q~#4R?g3EBEK}El3@%<^
z-VZ;PpB>*G97Ck@gidVWm$DEfO-}$>;sJEp8<CJna}u0JQxre&m{8AsXWvfL#p1OK
z{j~0kBQ*XO8pftwfUI|09gUSFHCWwl_yGq~zZV*>5*V*F7<0A){uI^6!pU#ayb0Vt
z4&7QkPCCMSd^Sx42vL4<Z+F6d1whQc*->C&t~l1n3X<USqF5geQtNElk(%hAUC~XE
zO@#;KpX&!60Llk@Vm#!ai;mxHoK#W3#t)gD+ZnC2BsA-kG-)g_*|i*`7Q)X1#?;fm
zn`4vts>~|g?FR`J0;q|8^->Jq5a1^H;D6aq0ypYsax384&0LaXIw^iy&dxgoLJHpB
z27_T_ooCxgfkR@UT{zkPY?qr#*!|28z91bZZh!%9jBQ>$$Edw6WX4Poq6>p%YD6ch
zwgOg=B5(uf0quR`JDxbhFOUjl!dwhIDZ)Q2&SP&{HopmuuNytH74xd1zrH?FB7br}
z-eoMN@ldHv_l4g3!+|s4Bauha?B=id@y<RiV)jbI4A(brk~J*2R9JYf|9qOATYG|`
zwB6XP*S1&}>|Cb7nJpL0Eh3<fdDT($>%%=+P=8Ny8SN$^Y2dezzckaUb3b6@B%ro(
z$6WW5I$QA16%+@3@&lK4?#B)nz~V7!!XSr|j<eX{=NH=4dU4T4RwO@I+t+XRjoP18
zBXar4ZuGe?_ZuN@_1K@qv|^Vbd!FhPc3ZL$aP&@(fdA@D0DFnis5NK|KwSQUS84JG
zIgYE}k*8!NrOi&QikvZzHA&`1_L<d7e}}8Kl!xwsw`~!0RTmiA+Q6P79OwW!X>VGh
z2Xv@b=8sFhB5-8mBfu#F_7L7NQh$y=I7NPMN`r(sGV#pc?BQ#is;tz}d_pL_`a;VK
zsYlcBUdt1-8iDko8r2>fv?*bi!X&Ni!jMo{mkN2tr<|z|jK&3E6w|!rEK8PMJm4w}
z&Q~OLa-LO-v(94ZH1scfdoVu-X6o;Q4Y)NN)orhUVhN@kN=&jH24ME#1c2CD^dt#~
zf~?n;gh_!Bkt>B?St+>Q+RTmAi<=CAsmp7CF+Yg^`nsnVvdnDoQSAWLB+u+5rHB(?
zy~DL7y$S1ZH+!^!351~mYJx<tQ|<TM+Sp__#0!TZ{ZP8j{TN%2U8p*O>pghv+?%S6
zC4K9*)Ie@&SsEtrH(6@oMsc?u8?Fmp-cX`gnCa~l1~X>QtyY}U#&>3y5t^;ffZOK5
zw$Tiz4AyD29qduFbaITACX$0)OJ9R!kju+F*x}d2BTY!VOAp|zI&&{3%vIuq7cH;i
z$FS=%zVCDEPFv59e-^-}L2~2~!I3IPG6>tSl_Iu-?tnRn(yAyC6%OEH<z1{r=RUH{
zyncT;pM>-X5)u-@Y{aQU*W6_?;uJpl*~un{_lHCl)Gv*;%cK4YW1Yi-@y$~0$-7-y
zy7@ed0*HU>6EAx5XM}ILf38H5kjwgS$(^(vNtSh8ng-lKCI8W3<i~jmt5k9!dn30Q
zA>%bUfghlNo6dwGs4vyfsog&=c=H{(0JV245*5>;RE79RGL>1=#$v%2daa7xUG?%P
zy#Pj2UshlqD(C}LVlI53V%ad-$lUa^s*48;*G*T-Q%&;26F$ahMTge=BwXyf+I-t*
zL|{ZK=?%M*Wc@4nP-(zL_yTI;(`)pMih*aYK6%cH@{Tw|nOS7p%dbPFAR+qgF<lm5
z(A?wL^$<%#+KWSU7ZvI15x#u&yt1#-eK$zr{+F%$99c0(`3*|mT>)S?(Z2Jj43*-v
z#St)XsvkvYBoSH?tkmf3Ql@HNR2)?xc?S1=p9J9iJ1*|AmVMiRVmwYi8DEeaiX5Fm
zZF&Y1>bFjx+w*$?mfSu?9lr;ssHffqtz5&m<{-{#7LP%<&#MdhHH4D4_!lK{Ir6l@
z46^AjcU~6wz8W*)AFIk8cD-d%C8uJp!5*ma(qwW&oX<wo9|yC#(c*$s-$3xqQ<nKG
zB38)S)i>D%(fL@tl0o((;^=`*2-C8`N3AL<xb?ZF^7TAP)AY)%&9#mayWs+yDoZMv
zI!@gCJoto;u|01HL&6TQabR9`@jCuop6cd->%<e343w`)Xr4TW`_B)IMrGX6ib+HW
zc9z#d0WS5dUvJfo8+)OVHsHQ&Se?^jO366W0tzQ6pz(aVzE<`J)YXKX$_nqO79*iu
zzhLoN9O5gT>{b^;$O<t*lw!G<NM!wA*}s7{t7Bm2qh=gtpnl80HIjrM2{0_QzM`PP
z4W*N2!)EM@8#x}r7@{ViRLd11iUEY<m+eywT}C4tv0?P5G_YJ4fTWL<1DLP0B-tW{
zYDa6T<V8@Vgxpd$sf^iy$EFDQa5ZKPt0-1IZ;tb<$0Jlzf@(SdPal;HLKk_bx+tbk
zM5Nj%U`a<HjcF`RD#JkjQNM!Ri!xCPm8aDLfrt_?ddF<i=P2U<?`b=Q7`W*4r6AZ(
zGHhDoR@4FZ{VJau0BMg}s=n$)P)9^3xgn?$U5VUraBgF4YO)3^hmHT)q8|YW!UbC}
zDytuz6;Dx}y)H<*6IeIW!3HGrpG_x{K&h&QF7QcW)nt^nl=35ld{rm+8EK#rsQ7AK
z%azTYewOZGIK{P$uv>nQ{n62&IWzs?Ve2z*ByWZnn|c@*CM@`(fD)194hTW{1%az!
zY*!cNa@xWjQBh+Vn#nJ?PblJbCl#35gN}<o_+>w@EkUb7OOtUmf)7J7&#A6GE#`V1
z8H%K}(8|#KiI*LU_^2L=G{IM1+lT|@qZWUmdW5X37L=yv3(w$)e2PQl*&v=~gs|MA
zZvjA(z}GGoD`*}R{3HVwPra?vfZI;mK&K?T!&V@EK~W{`q2i6bXBCcQrC&lNP@p-p
zv+Jj16|faU=J&RznL4xz{o>NN*-x<WyT|`T9SwCBl$5lJG?f;aQ9*vJ#nzq8jB47%
zz>xr}>IolF1*rq~@v1?78z?!1+#j~mMtT)+6vrMaM$}2-(!kTvGpYvEw5+qcAs*59
zCn+S=C`3Uy>f5a={jta}K0SZH#xw|FZ8?Z%v}^&I7xhe7Nx&}yKKyw@YU18dWEc_*
z%6@vIXyn5*{|1Fw9zLNF@Q9;`7U{YV)<~X=Y{zprV{Rl5m$KnNwLCuPqK=f6?4-qa
z&2yJS9_ke*NZ^)yol_6t!?{cQ`kxZ&nXKh@Qo0*15bP#A`!m>$a7rn)YT-H|y*Gp*
zmrmiJmtvML+nHpA)41I;)mxjJxA<i3xZE?1t34VOnfiNQNbO-ZOiDi@I}Lu69~f&@
zbcpF)-{B~2M$HIK{1R*ZYI3S_qJYo$q-?XoB}G1NA9-0fc9x5tO)8<HC-m`RlPIjD
z(Ow#v4Bq4kmcW-#aa7#jA<u?C8Z_dc@}w1~Y043Tte;n9_>gqsJngXz4Y~puJjv-X
zT83>ejTjv8^uppcq$sUaeRrT@<-ljVxhse_h_MDe{cBvhvKj1}B&Q_f_X-CiGNK=>
zYK2s#C1zeLNMQS^jf%HA$6l<7mhV(JN~{rRmqV9D$3CB`xtM*Gmapvzp+3T3cJ?Z9
zBuI#DC!d5M`}jvv))3@XcfX_+%BJ37A_~z@5gY_f5qZ2WEhTa{O2zUzEb$c*FL^9@
ztge8pt+A%aGniz&t(JPoDg#af#n<WD{<oB`{nF-E$?!?myDX)V5fQ}M*)nN$LT&Pf
z)K60^In)K<#?tY|vM<Onh7ku+!s9Nc#-@ZJLS9j=^4;}kP)`}I)w;@I;rqcA(;9_t
z@gB_F3`q$}c`l=t;ldfry>cdo!zFp>i`CmnoiH;iq`I<@4WT&6)seiUM*A9<Udwr;
zv=}uHz9aHePDP>RVZY}PTR2DQCqDLHy7Y|YfU3Mcz+jA)s1N8vr<gWgZBTnER*rD!
zB1)sz-Lh5OaI4BFq3{>wU%jc;eT|WrtP2p?A9pP+Wh60=19pz~-}sTj0sMc$0ZVim
z>n=@dT}zP+pQLCfY<y&fqV~iBhwsVukWfv^)(Eqv*inu0R$J5tUav4?D>ycYNynI0
z6<o9mgN+nzRx3vy-ikouyeH^33}~+6gs^frJrS$0qqH)aS$TfOSAWcZHIl7NcM3_m
zM>ZMRcXzIW&XZYYh>MmZ_s6|5W$!6OIpQh89c5iV&t+|Ef>St-En$B$#2FfKr_D&j
zvb9F(<t?+xrI%9$YJN3HIIG<_|2@(YsIFfcn<`0b11Onq=PlZBJpb2s6HZYrdyQ}u
z?!HAQuzvyl%JrO|g*rfB$@Gh%Q>k2MaC;*=G;4H2B}=XL>_5z>I2Cp+!$M;<N;MdL
zLYt+6r$>Hn5f=3mnj9*V*Zq3bb0-@vVxOg^pO>9PD3$vy`4jM5xqZeEABZ`~J>JeJ
zz8aGz@Z&!x@Ccenk%6{DV7yZ;n3zlrW0H#PdiZ3MC!YTT`@UZb#eRQ)4U7!@XVK|N
z8!KDtYXWFa4SaH}BFec@2EBKF19G-35{%}5z)7d*42DPP-1=H5@|&O;`Ocf8Nqfkx
z|Lh^1X!#oqFG(T*#y_sh0u4FPZ=dUBDh6iXhzBzKxY*cMBK>iVn0GbZUHa|3VdS6W
zt`cuOGAVIq!Mqz)`o06MML;I>zU@C*!~;l>y&9$2NNkVpZ0;B!_>d^8+rX6_iPSRH
zEN5W|JPTVnFEtS9`+D-@f*^2Q>M8V9?@AL#=tUmw%-s-3v)>O>@1!WZQdC=Pgs4<V
zRFlnN(vQ>UuWoM!29Wv>mF3guF`_x8<J7-uND=9O?YpoM0J-pmcTsI|KrQ;vbYthS
z5u}vv(x0sX9F>duP11C5anjbMf5TmD9(VW<e7~5{|I!31NR^ke1RFpC0k20FI8szq
z?F5hQ?A4TzlV3Uf5?Xs*VhFsny5S(x6vjDfqHp*T<oZG74Y3Vu)FSx&Pwx9RhsCLe
z7cM^|aDV3>Ti$uN$AHw)9o1budnP1^aJh8m<<d;2F)qbvm}yyW*tR&6x@BtrLN4$x
z-Z#&7_YM~S3YN%H%ek}E&Y0L8LgU=)I@LKZVZ7RD++jb6%^Jt)^qk2KT99xf{*<@*
z$`IYP_+DN=oXL4fqI_*he|nNTxZ98}feUjQ7$IRyt}^<fzh1MUD#w~S&AVRk3KZrB
zE9-xY-D>ivqfD%}jhk`eTim3%y1)BZu5))&@@%nOU@@LLe8gX1!ZnuZ-aVz>!HqOb
zzM<{HuX^pf?*n=J%!euptE^WIEQS;Azk0Wk;;`orFjT{WoRnUOxh}j0C(E>WSNfD@
z+41;0cdM2f?B{#!r);x_WBP5=442cX=ju+MFc&m->s~c-xau?OKL>7VT;C5nY{N7)
zanP~@YbSl%acr2Q>lX*94i%c}3l-11F6Z|z^>l7IG{sJJWn^Y+zbsUU*VIkUz)WF$
z8IjS8S9+@wm$MI@5N;r56>r+RPu|AOuAaOeJP^H>Xx#hVMBtw4l^Nyb#QgQouGsc^
z;@t{gteg5E2$8n*ZRuv~P3dK0rXpNbbkXwf%<99~J(}@7kSu1;yj}ZcTxiwy7U?{r
zeukr-QHeP1`Qv3rQoFjaTZg_8t(F~YR?=C!folHM4^`{y`3ICr9P4C=DN24*2Ye;L
z3~B}<%h$90<@z~BmD(Yq>LJM03X75tQBDhrX|4sU29@(sXO{ZfG-g`H2VTQ5WJ=&E
ze7G<V|IRA2i)<7CsR;jPR6I&7O!)9iwQt=n=6ZAvShxv^z1jK|$(Y*v@umopdmcMe
z(s-w1ab3x#zievdS)_!=R+D(bT2<xpEMITC;){GYgInz#S+mJE5g64e*M!B%?sgBO
zAK`{>^Gw>AOJ%%#<6drZJ;O&NCX(+(oWKyg+)6Ft-p_1Y{@$a)_MO*vY|?!c(=q+x
z>dd3%fx_PShbs;B{)S6ky=C!kUfEg>w&j*zcL^jp4Z2CJt%nCsN4@Rr@?Me9n?IVZ
z{BAtJ!L>V@AFU9|clPjI)3${Bgbt&$C`MU{l0w7rq(_9HsrzM2?j~srV=Jq?0;?2B
zLFNgj$xn_HuAO$PO%t%N)iHRGFw-_I={Egx|GN7L>t*-Y9{rqZtF}bfKH0Nc!1{1~
zCv5rUOPO(3op9eR4XTu8uct*tROOhW0Ud+!(yAYy!i9+)FPEm0WILwc9CgwwJ`EHu
zt?AM6G|{o-jWXu}cLCAG>G|nBbzF7>qO*DB{mz{;n*u+|JiEST&fvxq?4}Irst*=j
zNy2WdjMGkakJ5`ye_Rs&R#m<s!R#<+=```pw$Yssf*fTX@+L!^^H{%6%1pM_S7wR_
zNxdJ6RB`IU%Q#ZaXNeduzgtUqnzlMo*D8K8@{W+%h^_EOeZA_7&V0-i0W-DU&Ohx|
zxQHwdUo4-M%dEU(xHcR-IOCnKR=D}-scutYC--80dV$?s>7opABwXBd8z=UIS(H|?
zEWw$X6F^v5Zwt<z&z0AUGgWnz#`*M5868AW?HhE84(Hmy)*Ks0sNLn{KG7A}X>dp0
z>C!+Da+O>F6UD#L=>MYoZnfpIs;iX`1@^{I6f@DI=jYPbi1C@H->dWURf%77BsQNt
zvRNlE+6)vAc6FznvNdIP?@@|K))&ra>t!}yJALnU;7P&O>iC0Ii~Jg^k+%X{OUmNE
zbO;aT<<69T-`qSAXg9tmxMXv@TUPbObbnT9a7fI(EwNO_1pl0pjweS-J2qFR)I>Mp
z=QUEL4BM8aIdR1MyIW^N+-H!Sc&B@`qw<~Oc2AitP(BlV*=bD=7H_-V@4FuNW5j=H
zXd!Jnqmlo|h0!-r*|vRR$u__@u@_Lks+7{adq4A7TaKLAujd<!<Hk;_Kjpi1#RQ60
zPukkN$390^X)s#%tW!(WQl>fOW*2og%N?$65$?Bfn7SujH#)X~1s?)q%PNO0k-E={
zp6%5=^;wyBE1HV>Jvb!>p775pPTH+1ZAHkR&Z|rCeBYRy2SrYtTMZW!PE**Fjz$SP
z&l*VBgFUA$7`{6J6|48*?mbzP+>E_HHW^25YHvxTAbyMm7V-EJVz!2l2s;sMm%goi
zcRBOQq7jA)d&E-%_fl-%3g23z>?xji@`+yK#yx|I3t}^~PgA>A7l~`1mxTAvj6Pm$
zE3)_|z%AkL)MNV@T?eJ3DXQX=g2KNyxfdm{+4{L{IpnQ2C@|GO(iJooY;RXzo10gg
zAtWCuK7T~Qa(U4GsrzzMr&If(;?=>)4>#v(_S=4a9co0pa@i$}alf(q&(cD%C4^|Q
zTTaB(AtLeMu?v0T4m85wE7yvGFNQw9JfIkm*x%@v<h?L77ZQGM^?R5&Z+1agu->Gj
zqjkCGoMZXi&}k#$^t2IDRcwqR?(5@_*4hP~(L2~6eO2O>3mZcE$yQn#3o2Vm&BcnJ
zKfTUt&GC-LzKCjOAIDl^h_CLB8}${-5XlRlY6h%CIGx>~YFlTnUPhbGQ3=`>wrelb
zNQG$yi?5X#I(g_MdPH(zQpJ%iX4Ugi(VA8$#<g$dcijmQn>;xdFMK2fUY~b9TpTQZ
zf3TAi74LjaLw?aE$fhbS`jQ_}J$ytr*-A)szK^PsaQuEFwrHcde8nP@?Qqxo`R<SP
zrFlIeqIEs_!}2}MRAZwf-M6VJM#3bumScWArF$Wz58jl9h~C7GA8+W`&R_;ShT9qL
zk<$d+b{8J7o4>*ALib>wu}4d3RbpOBGlDZ!!~LjYQuNaCcph3ooi)fE87ymen7=H*
zPk(C8>PQwDirf?bBYg(#UAE47OVc40(`Gmw)pDeZE?D^<=2Qoli#i4Coz#jejr+=_
z>%#=E%JrEFh^-a$e@(hi-=EACs;Lqt6UvVoZZ!_kFTUKfC8R4rU0uFKxSp9B%u_nL
znrYMeXgT}Khi7h_J%OD@-*7=mE(_j*)359{%OET5J>2j1lolU;UE%&xmPmHpan9aw
z!GW_HL}mBso8?tow~M4U4!I}t@E8{M#8$EVO!ARf<2oVBz<o$rWy&(|ee-6{;PN2m
zX+P4;d#g0b`*63ln?QYpNhBXeZqPM6+b~mG(QER?OFxz}=MjpyUvG{H+>A)YIr>8e
z_*i9pI7sh~Dv$748b`$~FJ5{x?S}sB<$&TQgQlRo6QRhLTT>0YTv)GBgd#KW$qG`1
zvQ&&cUo+Qw)|so8-<4;5uARlI-6I=6z}%m{^tCd1W85K+5PI&;pos)C-eD?a3hi+;
zQm8rgkjUq@*%**-c#`0?9Rka$L9n?HEct5}4;zMlVC!e!k?Fk5{cFA+t3RJQ)&8`7
zFyh-9sd?oR*FC=aWIaJzCSRX0_6z)uA4106*9xOgY8e(N*tZ#bB*q}S6`yWJ$^>u)
zDyCU*ho$xIjqa&hW4@Jqx4auCQaM7!pJ}=NS$yS7kxfN*<{`U`Xe1J2{X=WRW~d<A
zFr%$uDO^e8Qfm8db~NYDg-l1BYA`=%d3v(ShNYGjtt_Ln#QV<EXfIPmtQ$vJ+0l~(
zL0KXY@_ZsPJJWBn-6^XYDadANRTQ{+_0?>#Hz8(ogf8f<o4A+~Z1ZMo3M@h!7{d$M
zFO}XtzhW0Zym%jvmUmFU7<B(`kqeur=a!Od&ryBbVU5m(a+Xo)>Z`=ZTi?&H$wWW2
zB0A3Yb$e_q>TJc>*B&<~ky5qM=PqXmVT{}-U5p<a;loYk`;ErThdr8(Ea7vlL`pQX
ze5e>B6tG`M#M)15tY)S((}kZ#t>`YiTnpfz5iCjCyO_Ts14U(u;Z-{)An6nU3&>j7
z*~*baUm$c{6S`aW9XvjkY$JEL{H<a{@Qk07NcWeiSB`0#Dn&G=%y}f9?dr1lpnRUe
zm*tF6U?pupv2qW$t-kBa7p~{)RpyfP(@fpJ4Sl_IhhgT5$yWac?#PwRtZa?Uq8uHz
zn55z6lcgUM_SXgE$=42!`yWmGKE@67sHC(4(NBz$cZdfLSvLolk=2qoK`BA3k0Ml^
z>K{rqs;Ehb;#K@d$$j}JoZns+ugp|hrm7&c7=KZN-_@?RQ5qEoV<*_3x8_9X74-*|
z|C&fPPgD6h-;M+>n1_QvWSE9d|GB<2>B(E?+PU=>8l{bw<-+8)Dwj(Q4UnBy;zOKv
z1HSUH)*jZ}qwg4OwH<@et@}y|C4nOS2l3%8)rx)eI%EJ$)L-DdepeG0B%b&gS^6mW
zqlPUOb{5XS^3K#!y1fFJL|e~N#|!SYHu)Pt9pl6^Mv}~KDD$oED>fg6HXY>9=2fDH
z_}b0YZJo<*SPpZcE*JyGB`_C2wL|-Y<%#8w<;f4J?q%qE0C#e-tB*IQx*LZQ`iYho
zI;|-vuj1poD2h!A1EC_mYjdMxQmVfWGLP<+H=#nOAZ~kqWZp2MxquQZ*{FJ|#9uvA
z>$OSX5K?4mkGS^MgaIYK@T_QdB#UkRLFCdxy}W~a=uL(_r^UY63D}~-k#CJFgDpI#
z?I+ccSZiYPk?xOn!DmIxu?d*|(aM>iHO_8J)#nx)uJyEO!~K+cOnsDkQeJ4gHf~*1
zT-l4yyCjSm#I;5~Z_Y~i#kUwYili663|2*pvBG9&X%=lH^wIwm&Ud5YhS$~#%}kJ;
zP~hysP)3nxy_{!uXT@^g_lIH9^$rHM2?OH^lRiPQT}0PP0uEdnpAordULrPl7Ca4)
zvmDvUoUVW4n!mx*5{eWpZ_8=sJ3~)lj?+Zlh+gVWw~)oYPJ6d(g*<ajwuQm#@>sLJ
z3O>dP>8qA#9fmBl%8r}MS6Ge|Ry|$x>XyR)V1M^RVKu|iZwDnj*7}9J?y(u?%c-E#
zTo+Hgdj2z}=nJKP+0@;T@I?#ic8}{{RnzdrEpoi^T?<o5#?xmx<CgC9^ljQ%qLB=8
zAAA$<PtJsuMhmOAf7fkdkk?WlcGSPO+FoOs({;C+KQC7GqT*1xB2ht-J84Wb+N0?0
zzC~O1Xj*NfH7JhLbs5!qu&Vt9(N%?^QJX3iFW3+JYD!2gkL>{R)p~DERRN|?eNL(}
zajuqsYMF%n3=c@NQ}4xx^OuuCpXNo~Yvrm;0ts|+;!1>$EF*WHn`rl=k;}*9XASaQ
zn9!VY7bmH9=mdTM4`@C<+G!@kauUti{>F<i>DW=tiI>4mj^ZFF{$-J5Th!Z<DlXh2
zY+K^9STW*vE%lfJ_GzNWhm7pfbtO=#x^tq`#BTf=2c3PI0yboKn>&J%K8)atcymhm
z06u)r$Af<|LQo{~LckZw#~}Q2sw8N=J-18UdcJC-*2v|j?%G(Rzym!^NQD{#Tk9PU
zq3*igdbg)z={cSIh%(T~9ufC)v-2H3gr|FIc<T@Y04)DA#0hR8op@=FOc3PD_WJK0
z+XKP^&k}ix46mX1u<!5=Nu`#=EJz-%b%aQ-mHh)|r}(li6h63G?)Set_VPHW#9|*D
z4>W1#e<25;W<ugH{P<2IxSbu>M8bQ#5<U46a|Xj&aD1;gLyD@`U6!`bP{_&2W^=ic
z5+AP7cW{TvB?(ND!@*Z4pjBS7@yBWk+TYsY0_+YUmGtpl1fB@zYA_%h|9^$M{@-o#
z|8C}ORPn!?nPLaLl|-A5#h1Pee!uI3)Ykzi#@^Xp5&obLc_@lYe@yv(|2E|t%l-)e
zSdPM6ZW)yOH*VN%!#|K%YAVvV+y$oDf1T2K{D0ui&-RiSX2mrB#skx3{3Bbg@g^+{
zz0dw_#mxG>;h2#095!tKoqqSb(lD2W1%K6F+$%VtOmsE;);8sAH$@F;A$QVm@ZOGT
zTiEZdj7In7-EPc+^S%}=LU4!3wu@{p)GTYTocwTc2aG4-=<oUS_U#-IB$0}mBe6G5
z{|2JHSm6S0Zxu7&9bkGS0Pgk|+8v3_^CwG+`Xfn7Xz6+@-3}B!N&?xR?J&l6c}<gu
zaKY?iJMcc~Xzo9imY1Q7v;Mf%zMW&vhj2!+6BI%TwHS29bZ!dg&cimGfsfPd<h8rV
zx=9~Dt>n7%aX2{;{7;RZJrtjZ3la{J?|fq@^2(l_*OFv$gTc^ks;IW}mk8j$zGvGx
z5!*$cCl4PNi(2^e;}E6(r();7Gz%-{{=S_g<DujdoGRqig(d9#h3hHg;PQVmq9|?!
zL#w*+O;YZ#aR1(_$Dvi5uL)A`w5km8$HD(`n_fFi+NmgZF8;5b^7mF1gjS7zX~weC
zs&Ml6moE8do4z<jid1NJF8;5b0!JTzX;pJ*)yr3#BzIbs8Xw**_yej@pKs<v>77}J
z)X~}1Z%>(RaFN=1K<E;os%+W2^W@zy`0Dq`uI>yFma}9xQCP9%zBaQ@&((=n20Yem
zdx0!<wjCp6MR-jrLICbOvKQ)6+w_!#`CT^_J8iQIJMzHhFz?fBkoc;FH6cBeL=bWM
z_3deIa<I77OmA^9oSJ3d8w9I1D>nI{f~$uw*Q6@PgmqjLdP*unWOD9Px8DFlBovz#
z&YO}-Vo7vgy+m<$n~p5qB4S2nYP$gA&WNVwht4W_Fu6mh_97=ebiY$?lla!UgDBL(
zX<Yc;ouAg8V?5|RMJjIf?1$5W%Jk=#?2>{X$A~(=(#qBo;#}j1VT*Ajo$ewBNY%T*
zaVd6(7t+o#60-)HrV9998|O?gQQ}`fwM`9?fH#FAi#!rbi^O0m=s1ICQ>6cNRXIp>
z_L3VIQINQD!j2Qy)}X4T@&OTwE-{m>?Aacre==zAL8SNBgT0Y!#H^}S;?j`6KvcZ}
zTHf1a>{CpmVQ2AO<MJUt-s#fugnT&37vNlr3L=EogqD<k2Z_6QpeeCp5T@QFQrPvz
zsIwaqW;~c0xfXGt$V6kI#6uYTlk}kz+!KL<R2ggr{EHkLK-(HA+4k+s<`+OKWlA`_
zGtTVgSf(MOSHMCItD98P)*x`b>|?lMZ$sSzkdiH!`}5mWMRlQ|M`@Nsez#qk?sobt
zwj8m5X%gz}l4o`wX^ZbEupCZ)gs{;Aw~q^vL}=9lDDUh_lT;idN00a3tu7?7522QN
zX#C{G`eU-=Hb7z32C^Rrv0E6KV=+<vu2b1g4cuwJTwXyL2m6L60I(geZ(b$wc+~bJ
z?~H*EINimr&zU>^etb!F8VY?PSHlR<7q5Ha$)BR5320g3I`FlOCb+VpMA#z_)1jNz
z=YAPnX{Mn*^0mjtYB$h4XGZ;4&Alg%Q$~@>QpI8LgjFG(rY%VPOSR<_f@T4XT>7?!
zk@y_bU@<43^Fqw^r|Kg}JXZi^Pd6<?DN{axBCBOrq=;I}v;DTv1V%@-5F04gOn5Qj
z`#)}wxUe=bdUy%VU?l5VF#@Hv`GLpICmG27S>O!i0wKt6b2#Un?x3Z3d={g5|B{w<
ze_5dzuoI#1u5$)h#G8qX0!B7(-7^@H9h2u`iM{EYLBzQ}Ank72QyyWU(<W~Mj__9v
z6923$tzsb)sJuP$TkB9^&h9N6CXbe%Bh`Y4pPhPc8kEbjh}!$IFvt(Z)c%b!c2-Lw
z&bFxg0GHE%i#f^lHac8u^ykoj&P+)a3m~il<n-VB`(Cft06A)r%zPN+qEaZxL91}U
zbIYAn?5^}m-=Rs!wvNepLH#_ksJtST;uw~nT^(_g{vv~^c8dSl%S}>|3NdoT525~r
z3<~|2v12&Lbf7$bR6?}VwH6}3@zUWPT4WhlFeH+V(KWmTu7F)w_9XYH4HVLk-u%m>
zDZ`^(xbbK4$k#1OXh0qfjWpJ^6t3g4brmpXjXcPqsNfEX2pbo)N2fp;sxfIyWmgnF
zrjrT{y_<k8T721IEF}W@Xy2i^G<6^lI~RR8s^0}|@jaO7vkZ}Iln9II(s23_rbG{@
zgL`hZhh4Zn1J;ht#EL)52wAG3J6A?OPyAk$L5PAHzTfez?0WjOJQ>s%`pe0V{IZRR
z#v=^c9ic=hD)Hz+%h%-xGl>e{UX85z0t1%F7NU8?{M}<pqs{axH4Z=z6&!GGlgBXG
z^Q3B#t4G8NZUr{Qi7ay(1JM(go?v)%8{c`x3JWB?v(M%2*<ykM26eK=OZwYk8Qj?l
zoQjc7DK4@hN?+280{nO~CG%?hlAP)B$>GV(mxd1-$I1JI2vq!#7BF-Si}0hm3zhRQ
z_#D*RD0TgM6z>p~>=ii8xSLRC&u^f%_`C#7YYz%Ict1-3p}=Rc!$G3Wq>5Nk@Ws{O
z*YJs=!v*oTKKI*HKhe3hxhx@&LV#H(Mml%H4;(qPg9WU^De6eHMDordnV;lyG-t?3
z-0nlHs*c#!WV7aYUG62n+LqiAU?LhRO3+>c>$7FqKt{gnH&Df4_tZtfZ+)Fx3z%d#
zeF<=kqn`z<u{Kze#fjV<I4~co9GayV5Y?hS1pCa&0r!a#$H1IaA=qBp<lDI1t@|;Z
zi$czbD6~*hCKT86t{ZVWV($<6!UckH;_hX};i?}G_zCJ9U=i6V__$^XYY>VpWt)+@
zi@cScI+dX4e0b6zjjg%SDx*I+xf1iZj8)I^L_`orw1VQD!3Xn!YC0JpE78F3xi%xB
zF`|~?hIvE;#MhjVu0E+OHY&tplT{|l^VlE;Hre8p6XF?EDHNywte;u|?4e%3Q*OIR
zswUxKiX5r>9~q8Q9r~U`ChaYU*Sakv`+>`ln^7fH+^?eL{1{TMGD&4phi5D+ybO)I
zkLyjW37^%0`mIlYmNt$gLVUF4_U7Pr0F}%B;@M$e3f!i`1lNYV{7q*7i&sI@faJ>#
zM<qJ!i+UzhNnuo?BuaJ5(A6gg@%Pj~glvSQjllch?$Du(<BlH~bbb{G;G$c0qOy+?
zFfuonA0FS1txQy201ev-N_E#XUOElBJ6CsrFh;x5fp$Cf&>MQ)<n-*xNJ(JWH=g7P
z6gZ;;uGJMyUOOm1_4Nc&eFWLgAi73L-hw=l0*ri*tQTg7)R8Zf(j0acR1;D)!TcdJ
zm+dLZMSW>BLSvf57Un#UZ+~Uj%iNSrR`r*1J1>%0&4uCG--!x@xtb3pO$t=+om59M
z?D~k?3AiL#AU`=A{AWyc`OKF>YaL@QT^~2CODizgo9aOeWhW#i-!6^WPJecJJ)=<w
z=s1JoG>LofxYJBr3NW&~`-C|T{r(u2GMQgTi=wK`B>mXtdF!25N+MBMvNvuB|M4i-
z5ailOGaRcfccU`m_4MZLT{}DdE;3zG+<pRu+>YB*)ZQpzv2E1}yrz;jQjeoK_pN=T
z-M%>uu!MkubB^JUyKtR`CmHIV3X5dL_}y#My|BIT8^E9Mt+a}6|C1fMl|~$}Io3YM
zsBM`Z3Ml8|G^kqGhPxn3Ik+t{v)vYwQ4kH~9gLXWzVjqC3yHFFzI-2^F8|Q&%iA5b
z4lJJ$&m+%w|8Xz+Go)MH0gp5a5iI-mRD1`Mf-+j5a{Hg8nl}1xkGu%n?^Mj|x4-5e
zp%;YzZ%B*u70R7{yalIroQ;x`hkp;Jki)xKF|9A@^Q}XW=X<Y3?fL!Ge;>h;G=JPG
zmF>S9hcu)AyKzW~)c=_y$IB!F+1uXT9JQHG7>L_4qM0qxzkp7=w2al-o>tO8>!hUN
zH{AXwiVb$V?()rnr6D_ko-`_n!X`Nz?F&9@dV}VH9hSFdm@V%zJ8yOc#otd+S+xjb
z%3YyxB08n%Z4eh{eyW`=?ltQ-Gd{bTF;eBX6kRb^u0LQ~XX_rHo!82<sUvZAv%cAV
z%%I%7SxnbO(&*CC-niwpE7l_Te)|nuLpSTVHp`&G*e?wno)<3jnrdaxrKsY5*a`k#
zkam&r!o>WcxxLNINwO&81&LqOof?YLere4(4|*)U(-HW!wfXhi5xrzRRid)hUFP^D
zFU<XvNA9H)O5yan6^^-L^B)9@zCT&(G2U9gJ=LFQL>cL~u>5uZ5q^1xb(-O9m1N<i
zzUa^0&o7&1HDqW_S71Nqt61N$Di7}NABbNz!%bE@_9-pBXO_6H7};f&W%Pb^w!QO7
zrPwF&m6dQ*u*T$JQYoCm{+%U=500(h(`_GFeB9%84%hs7eBCDBGv3y%eXvokQnW!8
zYj1@|rzv7NI^2Q>wC`RQTiaazK2u@Xod0(1$kIDb>Wd7M0_sHa&<jVJ+&}iHTIoyc
zhDtaWm9*`vKHnwx>BP<GSr<IEsNb@vgJ|6?FX4G>{EB-}(=W3phc4fEHdC~D$LNA>
z`E1eBoQOd0ehK#*bvBrQ?^9#NcZ#fyD?1RFqU+{Van<*$f`wxjH~6Z@2BSCc1Q)o}
zl&MUa#bp)x8jqJtR9Wx0dGK%}Z-nvWIn#%M31U6V7{j)ytp2DO8}?;S!7A*{G&QmQ
zqZ|KMdtVw(b@cs<C__a9DxFA0=2WO-jD!aBm@z{#mN{f9N|GT{5h6l{<Cv#|Qpp?+
zk||L*#>}(3zRpqofB*a3`{KU2&wXw$^qh0P!`^GJz1G_6v-W2fpDb0{uF6>|HnJ59
zlFJaL82%0%Z<@n&0cY2+jND52>$>@p`V`%^k~GaU)uEvilI}N74?aD*9v#Ye>3TH>
z$|feXYw?>1<2VT5aL3}g(!w@e9ZjMsp3~MLkxP8^H5IGt<KxA-^=o~<3U5zL+Vo`S
z<L1_M3ygkK2$<j<V+k{7^-2>mo!2Z42=Yd_-#skz4VXUd^oN<&-8%-x2nV&gGSqJB
z#60TydeiFm%?HuDa$GAKa%mhmpMTPyUf7%N;D9Q<H(8h2WZ+@F%-m{&BwcQ@2+fRK
zH-6p*rSsg1=&f8~B{&~Sl5@JpKz)TE<}iDvy~x?5%joO&c888Q15-*B5!ls4L8Df9
zU7yAI%PFg_)XZxu_o7i}C)k2dw5~heE{rYd{PIgC^W!3u@LX#em!Oi3WZV8?wY7(Y
z^@YnUY9&#=wFgCC_ER-17iLVp_K%#qu6eqUU(^xXzhhS+-{jX8LxCE#flr6Zg9RTw
z#WL->{*|2maQ1zCr}>z~d`pLb>*HfDa@(I;b&dt)+T?3!=Jrf<HuOpU2-)NOg=2^7
zjqv~I)jt`*&vZVgk{TZh3FkF6F4i+2*UWpO`WSy?ZBa#ZHK9ta9S?j`C{!|+hGAyO
zE-&{YB`?*wyb7}68!@Px!TBEbS<!Y8`qS@E)Y(PeeK9?<ztptp%2<k|XAZ7<V%)1j
z4ktbqF<|(6L@86`woMEMS8OA0Z?#dHJR~sZG3Sa6rc#j#I9ak5(ls8O?Df@WVE1gt
z+_%gqr4+RF`y0Z=<AXohoiq<E3WQ0~ra6BRG51hY{czsy*<J~qYww^$`e&cecG5!k
z!wa$zExTU*b|O?2dV=90o~FhBxVZW`?M$^WMTr1ei%XX?FY|}5cD`@AceZh;;+`$1
zxMz#K-;*;V0<ca}9e%=Z_*HkNHz#(ox7bub(Z>F5PxFu60=nUZe3>4U^?<I(d<DH{
z3iZniEA%;Y9eNeV?*ouu$+InE@`{I62wFoc-BPqCKWtJaR5;+=w~h5REq}bi38!Ci
z!W|exs-(8?+T^_aUV+)hiJ1rZ8h$0WEk*O5<=M*d!md!?qfQcH<1!WDd=p=dseD?-
zM{L_2tKRUyHl#Ead~IdN_T{kb`@+Ag9t*Dxk1$RMEX}5WALK>FO3dT1?MCC<+s&IK
z=6i1U=~Q*GxQ!$w))baGJ&`ofA2H^B-feVo8>@JD7CzK}WI<JO#k^Cp-K67)@OZ9;
z^G_bFHz%AXEM23<D}Jv_P{`-I^Y;ze>lT?N2$kpBIkWCcQgn80VcD|ae{-nNYExYT
zCFivhDd-XHN!HT!;b0yu6;s()d0t64kM1QCFUlgq#K|@8;<=Z-tyz5=%Z#ciwWCVL
z9M};zMrjHACmWKsdYsy^=aw$Iua$OOtB7yE5j|e;u42wQ{8BPLU2yU#mPYRC6cGM2
zT<<o)&UqzQY$&bpb#)b0RWC)O3k!>S=DY4l>Ml2_>N@?@@DLn~+;@H1<lB&A^uXxM
zup_VRdItKIhGY=?eMiFv7OU&AmQL1}OS#q$3CRk?o?4<bbmvTmA`Fd}j$3dinb6+^
zw(HRlo3yYfS#=}j8kS@qu7;6xS;NdbDW+--+(NkFnmmiZ?LsVHMJ3C^RG6a;BVa$6
z<!-axaLU*%!=kN}woeO<vf87RZKU{h2gOGYr)lqVZZdE#SRrRUDG90k9im>Oww`tK
zI@&NOM?JNWshWF<piayBXK|YiIfUIxq(u#sLWH1Qz2hd2hN72x$v<r^KK~0Y@`?eL
zOSCtla>V446eE3FQ2&#GJOY<YlZp31lcsUo0_lk{wLc1C`mus=s?B`G7N3NfYY_0`
zZc7&3TJHSKd;5q1=rKs{R{jV|Yu$GoAa;d~K@?UcUv0SPHkUa*(g?ERt6w{PI25h3
z{j$rJmxX=%Tfl)UlK<W|dyoDxzGh+fDG6ee*_@%f*e+EjA`;;T@Vy#!Y533c?EfxT
z5Zq-8BbY`I{vE?hN1)hl5@mFc*kq|O=&o^LGA~i|w18fxFgSD7!8sqM2T%5F<ud<4
zXa*60oBqp;_;FRvKGDiWl6&|Hn9c{=D>_7YVn4#TaCA&Ha>yTFb6UA%Yo7mWdbhzP
zM_vuNL&-_Gi}!C57=?vp;Nzai93m2N62a`Hzb5lG?MVwWd*!%;v@S!67q>VqcEBZK
zQcVjX^0D_O$p%QFpD`dc`%3xDE^M|>nHt39mwiPi4p6+m3#iR)=v#@x|3d}($WNf!
zss;FC%l@BNrs<7ur3*#rZ#;qr`YvAPdgd6GDZ@4PQq_DY+De`?-{H92`4b8XCH|J9
zM_R7Q`=Q>`+^`BudYac`S>3Lg@<i7$;<89v>v&+FUVE#Wc+^~mP<(#7-pm;{SJ|eQ
zCPBZCw6d0eTD&G5tBxUSk|QIhWRPMTprm#>ZL9Etfr6UNTZ#cb-Os)(5ge*qZLVA*
zvv~~}xiqR5xfnjFvPq?eQOv4KK2zpw<Efn4gM7*G`5d`)Y@n_xhc6jChJlh?0H5He
zXtaS}ONcqM?-KDTK8gF-@LXtJx2?}Il#o^*T&sa{>+B1pH&HUkD|;(*UXZdSwvWu`
zD-|`&sUMe#?`M#D#Rl)X@4I*&X)wHQkwYqVfaL`0DDnaMh8^F3VzX6JWIjnW420uP
zqj!*gl%j_INoRk)x@WUHK8}0X@Cs28Mu&-yVhf?6@I}AK4yY$5{_ai9K%g{DIKV-A
z7o{|`jiK%X8>!nnp(*v+ayNZQuO2xH(m66(d2&-$48VV#pm;L!g#tUVZBozR_0K39
zOo%^ANkLQjNKGZDTm|v`Zh96+`tcPac7r`z-L8~`j~jqzk_Lj>>Fy5FmH(}qWTOB4
zNWK9tUCmA6Bt7Y*l(+IDElSd37^alGm8&JMxJi=+;O|g_a#(;k6kh+jKoU=-;vYR=
zkowOC(K4{%`x*a?<A3x9-s$pph@(L+Evc@XItQZ5R{Q72|7eg81vLMz>{eI*vp4=n
z@tqZ36+nFC&qx|1P~-d1ZW;=EQPMXb_)h^WGu~6G)r=Cw_SHm?`U9i#h?apcrMRg`
za9;~0FnJ~dbwJ&F)S<tRGls`KUa}J*eY0QMX!uJ4a4<#jb|?H%JL==0h+)rJU5=3k
z`zYnZT`X@<qRyZHHxFB!>i<^mQTo4m*pa*6$!&T{G?*s7{1FD_UeFnZ`ruNcK1#h}
zprDsnd?kFlmzU1v*56(}MyA29>K@WJ%G5r4j$OJ{^>CNlaKa;6JLV{og2Gq~LNBV^
zw-j1o-(7%p^F7zxbc$rQtO3l?t1u4)B`3CKvNSbtw5pD_XNLKSp54+|GM^7HbbDt8
zZ2xGU5nzZVKv#oH2ZpC)pgRH408z*_HGs}eUHqdT{xb0R-Dhn{%H;EvGAAd;v0kt`
z)p`6|FEb`mz%WZAT{8`+TuT5S%`^T;+bQ(Vd*9PB5cc;5if+v$28#LQ^mh+-q?^_}
z<JK?9WBxJLQ`)hTx0HRO3Sa6nDLwls`LD245OYVh)n-*6argPudrQ*>{$aT&Aj0`p
zWM>j85z|N#IhoH@CN_M{V4ImO@exUnD}ihWL$i<Lu3>qO^N+`zZvlcL^i_C82Zr1r
zX&M>DI~X~d-e^H%;)^Iavw_R`c46bMh8P?+_X_8oE1$hduVC8+Ca~p>%7MK^1$=SR
zC|<De8`}ABoo7m<B1hI~2h!~IF$Hx#t`=iX)Jg_>AE;SzqtGp|rYWXuC0SH*%Dv2>
zHQgacNb+I3<O7nd2{OiB;k<N;|IaW_gLUkmL+q11H4wqb$9pbYWN|>czZK|Nrli@p
zC}w(l5^c5km1=YE%Dd49#yFK6GT77EdGXI9?4g(aIMO4mzbape<ZVVcZJu9=;MTwc
z?O7Rvd_ZN<MEczC))tzsx5t0FiXHc$oVTiwaH){!)=@eU#2+cK&Ez;~ctV_EDq0jC
z-$@Owh|4IUu@~pw85&kA`@J(xsw+3rawD;pd(^Y}{fTT@UG=egA-9S2>;2K^a%G9i
z<s*%#Msy$x7`HGgV;U!G<|be&$+YPzumI#O8cGF#-G^f(r|C?S*lXBmS`%bB2P{Tj
zs{^d!SwM$03=p?Ea3TxY{kUk;U8Dj946T5)V&cTz3As%YYCxHL)_F|3EmKz{?C}fT
zofJByu)*zFe0to9M*Ui%-xVX77j0<F1H27yY2N$%>3Sxjr}?Y0Y!9e-@ttL^xxIjl
zSKV|aq@}#Q`}>E%aL%26>B%9$o11}s<SdjO2!tFwcjpyz<JB19YXTLkISWx#BKjqc
z&5OSWG_c09P7~ca4!`P~5p;MCNJSiU_~L;#X0&J^s1?d1&S*bR`^auI=lC$+;I$=7
zXWE`8T5t4ob5>R7Vy?e%L%%4<uHQjy96OkHE(-OMi)23EX|+?&c&)GMac19aI0VZQ
zE>Zjvz>_!>8s_k{=VW_k%n@MpnKW1_Q36c0tVfKg<)REKy#wYrpN^sC>uQQuvQL;?
z7j_;I)84jy=a=?OT@5G>`ObcN+LtOdCG6A<BhgoJibhdEK153zvKPE;c=n7k*c~l$
zIw#=0dc|yTw?F$RkXaX_0V>BS4~45KHqVZ<;;dlXd@{$i=(`);K+ZF^GNv6dUfv0&
z8X#+1fK6p#F8uZ^H*Y$EknEHImkS(Yz&6<gw<H14i>pttpIqJ)t+EnlnU#`w>{OQc
z8B(Bh1X^(yUj!mKt;rS{jb<WfH@vI~<8<FGfSY{=1WTE&GrwF*9TLZZNCywa18*`i
zc%3GD_0O0$;zfY(NICTka3IW0^u#E0n)Ic<R|<4P`-#*gi9)9p(rfgHv+<-GyeCoQ
z7Y)QwY(96TQKDI93pxpdFXP1ur_6^MV>_!c)J_z$>`8Z0@vRC+Jr9XRyP++zsiE>2
zF(p1^VCyMwVGnRqQ18b{i6sP~lHVFulrPBg@Pz2SzO?K1LeMxKi!td8<xWqNNmZL2
zjiONkfpcUS-*bTf@7Z?yZH6;a&%x}xFI%ohkyGy8myJ>Gv`eF+V;W3z82-&u`X%6K
zp>?O~6J~+Natts7^=e@=02C8DbF=>3*rG;bkGPV=QW7w531{k;YI4Rp?oHpPN=_K@
zb?ooSOH0|;n{WP$IH^DINTu$>99}?d)+1`L%vuP^2k=)h$J%{5$gP!i_!9O0%ibWY
zAGJkK(o(|guGIRHgk=xHV(0utPI?lDlz8mdm#62r2_vS2+S39=+Zv$gt(-D!J#vsJ
zAZZ$9r4(hogHD2Tj;Afpzt~B%stSlJJtRa=G^K1_NLv?nCB!M-?TC9p(nNoVTiD&Y
zpKQv<TZsbS`kUtTs~*snS9I0cL~|{Vg^L21y{n1$7{Ns}3+~XcI~d7ak}eq_y7aS%
z9vZZG%?JjSTP4vq=-dzLs|^0S&{Mlss`X2R5lT;*7n`plqu@eXcTk-E5}6Mh?N~2P
zm(^<(3&1A;q}&!@lHUSH#2dxfw=A|rNBz}|>n+&fjc0ZI_wRh*hUStQ3MER&6zRc3
z-s7#HD1+1)FftK5TJ6AxmI55_FO)gYj~(3D55imBJ0yAm1mM2*LYyd`#}Eo1Pprm)
zTB4!#U-(>AIj)MwCY?$dmjMMXxKdR~920qW_&LvzeY-yyMGX^_sfe6~(-0csB@LmX
zOd0CG5VOAqG-#Gk(J>$IGWJyktnXZqVdorFTeXaJ=Hg6roN?oWj|-v<+!WwYgrZ1(
z{kO1GNCkku1Zu;|Sc6$54d|G7zW~8r=8hwGrEu;Gnn?5^ZW9+J4Fi|NAo(%)P24}j
zz;Fjz<t@Ng1tZ`^W#F~uBv1q#Tu-5JDFJM!DwF`UL0MV?&{JChlNq5kB^Uy{6ktY^
zg)n7S8fDcGE!67AY>=7~=XM!Nuv({o);R&Gy=%fXAj2#B@@e@V5K)^oxdK*>GuI(Q
znA0%9JXuBW>HK*Y6a3_0So?KGWy~-CYgH=gA=r@IKe7$1%B4zLHXuU_MQXRyGqeS7
z8$d}#Fmh}I#xrP5$5zORm(s1@kGX7=nO`pzIHw4}fUgPB6E0R1irV6!WMX9PUSGL~
z!7UTR`eiseB1a}x%fM9@%B^Y-T;bRoLpJAV>`;qOhw)0|sVgImR}iR#{YzI`3TjFz
zqpkuQG%!51<(ex!J$(MO`-j#PB`qLG7XrrC+vbY435`h#VW!pC48-|yzPxk4P^S9*
zOCV2^gG#6JL&Db2dxk_O%U*wc!fXX8%gzKvP0h$4G$EBtUhtwM*!O{yn^Vh)af(yz
zfqO`^N)deeNZz49D)8yt*_LhG5Zg6~(zd;p3m(HF&JC$)`_&jbib~-g9@T;a3sje;
ze}d7kKhuN<uCI(M0yumZE-be*i<K@o<6&bsms%ch5r#L$KT5d-a!<2UdX%E7#EH`M
z5hMm+4A!6yMi|!Dpbo55W7_J!e|*G!69MD(d7{&KBJhYfx^F-W$~M||iFAMOfg;yp
zC@0duxjFR!J<K5yTRrD^gbWXS(f4uMoM^N!T7us++R@C)s^2K@m#RS(bTOJ&oA%)a
z@!0*ZX5@*M=rj^HS22Z;z*@tY0x8~OF4vZKfa2m&pue9AIH>!@#4!6CIDe$X5Oc>K
zC<4nKb$aCuF(SM7beP8OK{$R$(Aby1cyKRLl}G3*#K3VBui%7>y@cZhrWWol9|IVX
zfW1@HtQ2cc=YmYmx~Om;6(vrtdNJa<&nFOxS%-g`*;BJAYy0r&7NmNt%=bp6C1wH-
zvcVqH^e~h0H!-#?(5hjNwc2uZvkk1RZ0NXuWd5*XEN9$ZY2eK(ZO_sl?V0IypEO&r
zFl@>}i)r$9(h5pKg~;Iw?lI%!r8#Horrr}><UrCsuUCbFiMhni^m{5bMeReSV=&Rn
z{Ievd6)>7}(#+6KP*W0bOHXjPm23PN4BYg3K(fvS)6u5#R}otA*x4$fcYCujX`Msk
znK~zL$xZNndGgQ)tZcj6^7-q8k;OCxL!!0w)dF#sSvA$~kzzBYaK>AGd3E7vj1wWn
z>Rn$DN;GGOacS<@6?3Lpkj>m^*xNl|$Tg<Ln2CO(elN%<W6o4~^I;apL;4hB_wlGV
z%bt+b5?@Gvz9LH(1vBb>e(zQgmk4)oaqBE^2AkCR-fj31ph|?g$~VXw0<>wlHGVx@
z{}e=537+B?|8)tK0$>O#j*pNcuEP|~7(>*dUl#=s2V}?$rr2ft1u5m>2{G&<-(@di
zC(rpn?D<wM_(-v5ZMSzSc-9VIFg5z_H-m_7st=}@=aUvzq8+EAHXZ-QTI+_c)(m$p
zjcyh#+&;;erYy6(r0eww*Z&oQQykvLn$ZiL#7tCL2I@?(5VxY@9v}0Tx<N$>um)Xq
z!CcP*!xa~1)DoYX0BoBLUGMA$L*<y}f9tU__#hl#?(}dfLEMo4T55{JP-)A-DAv0l
zf((;bndH?bLp%fS$?DT*MFfeDR0>ttK3kvD8ky^DX^&ssfh$U%T{CY9SbDTz<I_kc
zk`>h>RIx%|la-IXy4bX><NBS`!z{%D4-{~<jQp!3h04qG`$oJ(KZ;-sCVIt1ld(}3
zCnU3?9TyHSa4oAPYfVO84+{Tu*12i%Wcj*7Z~nAzGGU=2qHQW5=dsHrdZHxR7%agm
zC<{o2$&vZIr@mMf4U4flifEZG9rM^^ATu?8gof~21!DZNItW`+U?Fn_jzh_W;Bk=g
zOS0LUN?1IoL#c6ufMG9B;7i@20Xgy530C=tp3%8w@s?(@B!}ylQw$H&NN;Zo^c6lP
zEYDIN_DhTdrK0!Nt{gw?l|)~CMZoEIpI5!}$eLWzhzI>-X;qzSz5~07)f$9O<DvSZ
zJ;CSi*tGhz2yJ8E=x+Uq^6PAF^$Mj6mxzd)WaukfUOl##5LhCWb7v%Lf&E>OpRxm|
zyzqLh`DK|N!^dX{BSbI0nWVUrN|vyDk`gq7;kDFR{0njS0${Go5t|G!Sb)<L-hzkh
z>{x^2e+&+&S9p3L)Q5r+5q0-ZA{9>pWjwH!F62Qt(02HVdg5EL$|=?-ET#_b@NIO_
zxe=q#(s$_!W-ibdePx3~e9dr2*1>$8Z{;c_edAI!nEV*_wsKYOU*!e1qDBUe9=gi-
z)^YVE<LKGm=bw8Ds?65uLZwFfbQKC%h8tFnT~N*I<d5CaUL-gd(9OKdX>3SM@AK@)
znNS{^!fO$^$Gi-cQ`MUNWNbJtIk#xvak6~<`P?rt{vNSTRD`{#r?uRda$G2bRr`ab
zCXqt~u`b~pnUp}58He#CDz#$^vCEG>>&2H}Snj40`z=fu`M^`{K~jp47(_-?L>VO}
z1c|?aGT+`-h)E6&s1@`2Q#ALWo3ZgwI@oL8ka=fX9JjIrzNS#Q`hD1r4uuVoD{l3%
z`}35Cb`)+ivotSMtBzevcZk4SX3)WL(Chb1dUH0`^17M_EX#Q}q^>VDeO5^`o0w$9
zy&pI2WcQdgJ$^Pv_}lo-<?kvMYaK1uloa|r#+>Qr&M!W1>1;^qN-6z)Ht=vmj&jNc
z7K<*o!Y2)=nnPYsO^-H^Enq$d8_GkK-@61w)()k%l9t4Sp?yk=v9mSt`qTpyQCVkt
zvSKpiv(<C4qnJ3lK*dN;ofiu)`S4s?=ci}vv+xZ(J?t~Jokx238r&uJUi{cq=7zQv
zQ7II}w?6I&Ss_LcQs2EnM7Ov7Q6n+e9R8xp*yeLdu~0XUhBI_>gAGw7YUYhE&s%mE
zVV4&sF+ei<VNrd9p9Pk%NE+Di(QG(I>haza;SoQd`5n%LRLxrtH}TjqLetv<!M#%-
zyX@(lGBEw=?4nv@2SWYO$#tx&zWEzNK_`nVe~G%Nb76#U#)#IQRdO3_`OH`_^HZ{W
zcVyNCwUz-afr@KK7d7@R<Zc{xW=s7o9UG%}ZqS)g|6U3{<-Vhz=p|=Ax(C*B4Q*bv
zl6WrHjI%9m4AJ9+?$wVy`!YX*_s2?lx%irPNQ^mi;QBwETInwvvQAK(Ti)Smo9}bP
zxzxeF;9ZvTG!t|5Q^(T#KWsoeT0vXlVG}_zN_1dNA=7f7_CyoJ!a!nc>zt~Vd-GbX
zqiWJ+s8I1piQqZbMG5Td&u}K3pX3~LDk-PC($waM=9j;Y`<}m{Yb+`>P3!b&o!evn
zCC`B|%XaNl6WK6`l7umhIDb1u1G)>mMK9(;=%go}t`5FkS3MIY^UXF&l277pO5?H8
zZaWMR-k@Fm&`h*#rM)OC`+TnqS(#Yu(`0vjJ98e}dymt3)}GKMF;$EQ>@8iN7WFhM
zy=QADmpP{0IfS{meD5=ASMK||=2-MVH%(1)7JraX7jCveZ$!aGj2Ck=K{5GT)}7og
z4M(5c+~17{cyV*OPg#K%mR?`pS$fsU34BwwSLi{0q9Y=QqykMPyWmAw*3<jkN}Ykd
z4XG-xt2VUOpnN|I&e`G+#bfW+HV<fi)VuKvc2~FyIRoKs`0R6r*C$Vt^8|_6CGsr(
zh#qeofgs3j!8!h=eM7Jtx?KsP)ChiGi*@hW_nFi8Uw2y)KAawT)QwF|&ZR%xoU@G1
z*OPDQ_^HgasC~<n%J;RXX|G*Vy#T(rSw-(oSHGweZ_&XbJ#No%_1-nB8`U2!7rn2c
z@<ThntbNbMGjBa-pD5~*A*^&3k0l$5$CO~a)|`(wh}EB{Q0BaTvU9n#4@Jc@;LPyU
z{$uiM!<ii8`WVlUBNb?-k5w3O=YJO1WE8I8$)}u=kW`R%QAG_#-A>1wDzVhDHj-}?
zmH*`7dEw*?{S^;bFYj{7NSzAw33WTq<al=Vw#A%QOLkwtu4)!So<l;zWXzSxol))l
zb{Fuuq57wqddJ@HUTB)n=j(c>TH!R9OXy4p5-l*xci|{?6F*;}XC~yjUe}g#KJ`ry
zjB-MwldW6`3ioZ4(AGWR{7xG?bYB`3vk@|ROg6-Z`5B{Q0ei#wV8cv%*(mefPSv0E
zumibzxM}n58Rue*gZ_7CD%>!|8=2A3!PVu6Io8Z=d5wOe&Bl0V-iC5tGphIj3z!gc
zPu)rOf(^kkdg(VO1bEy73DTaOK#l$ic%Jz>lwLHsyF7uURM;JH)UooJ9{E>$ZRX9m
zMU7_zP$tWnY*P=cj>&~^lnz|>vtVLy@|=<C|9(yKvXUN;j<UTDeVSkTHMKGS&&+SQ
zS7T-iM5E0N%Sx&k`6OD$jj^wOK3&-@CU~8))A@(?Nn`S02+_s~YwBybF7nxA(a2%v
z?K($)eae!;GCE9X@NXzZ>n^r_8*jl%)k!=xDD&-#9i$c7n3<DL7`&Bxzee-Gw{lCf
z{!l;p<~v*|k3BkToe6=s!OHk=Sgd6CUYbV|c3zU78(acbhLrR_Gb+7~Qxvo8en48&
zFF~ee%9A+ykRlVk4iRE()x=xng!{;T8uNs~1FThM2YF9rtymgDTzkQkT<&p8@xuPh
zV<!DUR46MwRP4)!AkK*M&3@>tJ5vb_L83a9j<<4^5?D2A+Y0SGKD8)Xe0Tm@#Ku*4
z({MH?Rz^6e*8GUxY;nN~Aqwn3w`p<gc_w*Tmt8vErUTg~rsrMuems4oOC0~%W3Kjo
z>fJ+gjz3fej*tSulk$*AVLR%H1eU8D%E}3YZgT{SclW7$(Pe^hQgE&vPU*Q8#6P+p
zFm7m1hA!qeV6YOYo~_+g%%i8Xuoekyd#&@s`5+@+(aA7wdCjTV(9H17a?AMVn#`X&
zof7l!BzCkp_(jxd+Q)OVYJF*?UoZBPtLmOeIXE$3=f$F5<o;SqO^H)NWMV7hy$VUH
zyD~(@I!WOZz{q0>)bntNUTIkgno~3Nhpu=m`vF_klDue1&DX!aWPfaW#H!VR>bH+q
zC_JX-*Yh;2<DB(ED2%bd{f}K>qe!VJaQ{-C5IUE!5(9z#K9zYEt@v^eci7;j)Kz%9
zV;7%o$D4)Qc#8@>bxNhzb&kxCJ}bd60BVk`AlRP6-S6~7E<HuYn}LjcC20-P@nxZr
z{7z*BlV6jrTc$|fdVYDd5}QGG)$b?=8>DO~r-~RimmlPm(~OWihY|^LN(%l{zyS|V
zGQi_Yi|Q-&G}Uj|Dv!94LQTCBd0ILpGAph2Wjkw;l-1(x%fVaGERw#zMuZn%`BU0Y
z;Lof6%#(}hOjEnqKGMxM=h))vbby6ujo-n7kY;{a&<z=CpR<hF(P%)f@UV8tgIwv_
z#Bh7b-46L6{bZ=G`I!w1xbTwM&3<`H6(MOt+<N^Hq}}kMM8gW${1o<fqWIbqgSSch
z3N*M9Q<`l`kff@<)<ZiYHW6|Vr1JtBR8K)p`D6b9iF-vrSLpJ!2WtOYg*x<J10s=j
z^~Gtsc~5hlsGnqHwB`yIFQ`x1G)Zoh*_JQe#FSqKVnC0PFvS+FSLRJ|gyz?e;J}Z!
z1?GHh>u@Z4JCbZiaxT|z-z4Viq;jC4&-fZ=U}!Qou{k(CWfXGADxEo@8{ZpiYg_s@
z)?IMGZCwI67$Ml^V)#kStag9CO_wGd@|m7-m!I$Oe@p29FW!-u&<JizquBoE7<Rm`
z!VuW%+h9*zCMXcH8B5`AH^7wpw&y%>owRMvZnsjmG0Bt)iod{#<S{?27O`w;0^l|M
zyDpIVDANrx5)K5Q;tz_u<$10GPnul@Bah}V98WrTKMt1h!*C?fLXeui+emU;|8qf;
zyn9Rbv9N^$G8Qjg){yiS?Dnf8q#r&+xCfh}2rbD^Iu?9s_LU7?eR)m=7}5i@^a>#b
za0pnxT?F`wC<aO}?bS+8+|cjz-xtX#ipLg>6cNF5a!TX<Z1~4}0_s4XXQ*JJYi8-C
zp`8e+dd2nzUyU<i!(EPK1sn_p2h5xh_`U?o2qZ6|k*fTT?iO$5c}O!Mx%agJ){QSn
zo`Fgdb{BSENN5B`cnq9^<GxlOEp!Dk$Ip;t2a<1sG+#U99nyhSSPza8Z82P*j;jAa
zUy}i|_L*O!Su`X8Ccm9=AeyUZCjgJ-wTJ_AYr(~^%gM4m<Ar|N*ZYigw=?+kij<L4
zeViwIGl5xK1%-B;B8+M0W+wwS5iN2%?$Fs=At(ZzlB}I?)iIxC0oy~@=P+ItBQ43D
z35~G%F$oU+xNIKh`bG;gv^Wsl0r|Kx@1aOt+vdTVFr0hln<m&4ibT0d$*2e1+Mp#=
zEN0V;EVUW!uBaqilO=)<2j+|e$y7wZWJtpgbyBm1*h4G?Nw@eh>6&?`x3D?w)<k!4
z?;c)J3bTg$D7qVC<^^G&q@!S8+a}-yATCbX9NB~@wa*4r7gOk79b_dNY$)qB4=F$b
zvLB^<9(FCxEa!RG5SH{{muT-qENs+8|5V(`CIL3%!%!rbOD1oH`q~>!%_^Qng3g-^
zt{I}?VCw-4x5Y;G06hiuaSb-ifEH8xOE}E4SDQBpD>W2&rg19hI&yAB7tZ8kI2?lN
z4loDM-|A$(yak&VSISJ(&&!~?dU63Rf^%r)0RU8q;11n*a@(+HdDL)&&^Ctc^I9I^
z6zE3IZw<42U$wgLRa}r)<b~14Zs-fTf}cpU_Y}<D`^C0_hsY`K?fr~VwuiJij^Y!%
zawh0{K0pzeiuDp?v-0FfbF9dD7Xs+`V1Zh70GsX}GYE3X5qxM4u)0y$mOM?E)Am5o
z1zJA}@vszkqpVNiYQ44>fuAmpops+2rqTMsf!##Yc#s?PD1ojn5A<BvXB7~cXF>qv
zKqjCj{Ry&J(E<Q`<lG7^%3_o1pkCWUbY@w&`Rh*O-!!nkXvA_rl0F&mXRAtYa)WcG
zKA~-R3pjWQoaav}(So`mNMlEM*y6z4VXyh8w;(A?i`@NO2dU>ARo^$t5OD>dj!sY<
zj(kZ2Sr|ofItwtDL#RhN$|uZOge=3Ev%hL(@n&0e6=X73YUc{*bz-@p)3Ooco^<{h
zzZ4CsF>o-7>*N(}&D<1NTYW2bNFlUmj`moII*s*0veebdEwLehL-)7o^(ji2Ph~<g
z4j%yDPzx*HirSDARqrmu$TLuAstrNH7um_{_+gLS-F#<j&T9ZeXk=((rDwxel9?9k
zVU=^@a`;r-4Z}}~RBF&C@P(3>u8EWK-J4G}1|RiU_OTVoe<wjcjg}_zLKOJ;2TWmU
z#AI#AFPI$`<x)ElyhMuaE`q4^MlYQ={x{dh77~xF({~WZOoC2XsoMODw-e-Fy!j4P
z5#e99N_Y^tf!M>%%veYqJV>F*N#Ro}jQdWrxUHgxe+vNaF@Z8Ry&@+BxuMCg^F9pN
zjNPQR0~%sl$$omXOu%P$4><ni3|a0=(&Dk|sWVrJ2IxHkZ2iHxWD)nyF11S;4%Vgl
zVl1dmyy!HP<8W?c3gz1DYf>;spv9c;nfP8v)abq(B05WCeLYEv8YRGwR?F3UJIVf+
zu5E*PdfI5u3kpO4Qf}1XY{lD;YP^P)y&ZHVt-1Okr1)zg<B@dynOl`55SCUl1@;q*
zKtjktNC&h1uWsUrY~r8{P|_3<paM^1B#7-J`J}(~M^g$gwh|Y%XR`oeb*K5jY0|Vo
zD#gg3eBQq~wyFZqdpza>w;cShPMER&pNU9<R$Dw4>IoLGawW9Ul|?!!>HUmm4}`5r
zHh_$R90^mVeh4}eZzpLc5^hmrV0!$8>Ne9`#6MErU^ytt?(PH}^Pd|K`}SW)h!i(%
z-o;=93llPdBQiw9ZZn`G{_&ZD8jF9jvH#pfsSRUU`K0;_sc_@(U2J6_)W11d(ok;R
k`v1E`oKpXHjM4^uvg|Wzhv~U&GWe$;t9UU>+UWlO0Byu12><{9

literal 0
HcmV?d00001

diff --git a/docs/assets/design/fused_moe_modular_kernel/prepare_and_finalize_blocks.png b/docs/assets/design/fused_moe_modular_kernel/prepare_and_finalize_blocks.png
new file mode 100644
index 0000000000000000000000000000000000000000..94364e593fe68cdf46af8412a2afc7ba6eb33aea
GIT binary patch
literal 130810
zcmeEv1zeO__dg;kSd@rjfB~p<mxPj1BGM@_L(IS+jg%-Vp(0ABqJYwkv_UB)AVY(S
zfG~vQNdND{%qY6-zPtb3ckB1=$3^CO?sMbZbH3+$&pFTNtD-EsV=LWOA|j$4XXK<*
ziHJ7V5D}3Wk!}W8Xc}ps5D~%m+e>TNTRFomkT4=f9!cD9jNF{&C_8&b9w|m{ZX+8T
zc2f%@Gg~7oJ9cZBJ-7t!8=1kZ@ePzvmn@J-BSvl+K6XxU>zI;}iG{U2%FdjT=QQ|y
z#@Zf+1b>6e;OE)%;D;9YpNo^7ix1br){&9t6c@K3JLhq5Ll$O&K!M(%Cg2YjxFlm?
z3xlJa!Nt{9xG6ZoZ0#&i*0@<h53>v4ZrGU^Az`@7;9+RC#wc4;m@U2~m?JkMk2E9a
zDR2k+!y}1bANXKxWQBiJ1uT&b|Gb4M?pdufW^7z%q%E|irR0pH&zqc)HP!yHH$tCQ
z&a$>fHs(qw)790RI^$==&BKeEl&K5uihz(1?t+;uVI2Ih(1r6Bmk4WFoj5nAnS+HX
z%nsih?n!$T3Tba)^Zi8=l(jX?1iwl69*t~mQBL1CgQJl6)#2ONfQ5h8J9OdvPh<*R
z)wD3R2fGZZi69?-deC(_n1z`+VPvq2xLH~m5gOw!*qIxdqMUHguU=ZaD>yk4q-l#n
zf&SKP+S=QMMpn*}FeIdE1Ucfz0ebV>pZzKg+pn7bd%82XG)4%T$go+T5;%`gmvXQ_
zr^d&|gA<IS5z+xKa@++wdl!NR+oBw-O<~XuaRFm-GPkgYowqSEftou3{Q}p_?X8gb
z+rW&Gk|-n!r|LZXrh)?eyr2mjWo^IaI(+p%aNW+{7KMPVxx;gckCz7;2tP5Htv!sO
zxVTF;M&IPIDxzQBly6%j3qx7K>}_4Z_TYQu6~qe^ZwOpm+<dq}oWAK5KNr4{`C6SK
zsFV?2m(9L*4w?I}*>BJWuP(p500<L*(5iK}5{wHC{lBgJ9u5A}PIB|&xA*U}lX%1b
zzq*qG-y0<_!PM65Bp;#8TASn{80ufLlc#w!l@aC+%5wHHhCI$%=gvDiqX^!64VuDC
zfh)s<E6Ud19A$>GHbP2&yL$Q?-U4E9`db@C6bgXtDhOM`?Co9fe%Z*u9tGO1!C>Gm
zY+bZKQ%-h>-mHGqhCUKLNI~%T?Gprk(5Ih)S8h%_l!NUm-h&qFE)x$u_C~g5fS#?|
z-@5JayaSQ}uGRl9e%r#3M)nqtYkL8V@UMX4b=SEb6!Rm2WKNq}I5Kkcn?b*VzgI6o
zt`Xq&Hyz=>=m1fTRiDXbx5~aCqR7Q(1NIGi41b@{k(7!dxR2`$I2Bxf-;V(<uQT#G
zmwxwTz(kC!fS#VVHnxNQIws>_2QyVdNrM$wSlc*2OFd-*_73bH%GwUl?cX$U$isb;
z6gS{+HjokkPGE#Y!jLHFkN`%tf!SIBU%$$@zq_aM?Z$tmGR`mp`Gp9{kJQG9lM@by
zuK#LkBf!ZiDG8qVSwjN{itpLZFEfK*qBa5qw}z)SU)|gq6y*|HkJ&)ILSp{=m=m6)
zuG#m0C5qzF@js#{{*d{vM^U^yzN0~Z5M-WnFtWC{xU`BvKZ`&=V9ob_`8S?}IE{q3
z1bz!xa7zjB1LE^1fd%2{j0X!|0+U(`7VA;2uP5uj4_M$2aO-Hzf5YA3d@qPN8JgNK
za!Uf(RQR1P4LAVE`LE$~Mo0@YD9B_2m>lHMPD5_O0)#_O;cr-3n3_VT^>sbk_iFoR
zxE?`*rr@=Shwyo=Hu0?EdI;D45@nJ<Cu1n%a!E}=N1FePBA>P7c{OK3c;w&idGH=(
zt>;-+9QcFjzuxoU<*^Qi{BC}H3W?+f{{x_qvaqu;vNtjRpa0<&;K!pj9x?a`ak;gK
zK_IkikNkvoe+go&d-M~;A?!M;^WSnm7lAJQ;C~=LL{KYZTkzX=2hP9zj0ibRLtKvJ
zyQ#CYv;S7-iQ|Loh<4p&f7k8@diO8zyFaivK7v18YefW$S!+cE%li9R5k4RFCt1-c
zoC$s7Z+~<9S!+DFN&nAse#ofeLL)!@2_;j0ng{xtI>h@Qeu9;(wH%NuUX6>awH$(l
z{e3KlaB};rC@VOHd<!^#i$DL?kn^8NF@H^r4W~$u@~EIdWdU%n_za=|-}lT#kRX48
zM&LL#{wc`i0Pp)BQo)45K+84C`peVg0g(0oolabr$Z-9|QMdnNip0f92*%<u=wGBs
z{5atJt03tc3TcBV3KUaYMW}x(Byn-^aIq8W81Tv@M9883^I-{34gVr#f)YXhm;4(S
zCxHqPH0ejmgG&zo105Vh=6*N>{+Vf||7)7`BU2LkCo0mqQBgbt{EHCj&nxo4hql&6
ziPvS0gpK_ZIsC7o)qgxi=KfW|J~fn$0^t3cxavVr{a|B*1lb#?{DBao1Kj<WLQ~(T
zaDH3n(b&jD(3A&wt)Em{f-+!UK2X;A=aeJxa{(H*inj!QOW?a}&)B^9$otweHX(aN
zpe}!ErRBN`&i&VNkU!1h35NWiEgtVm|MeD6kjO6wCRJgk4xnBRU)l7VBhmi`a~I_P
z!Q4Tm06ycp*4{bSiKz+VmFe$e@1P$2FJOweF@I9+{wvBcdGHea&vuKC@%;Pk7B7)C
zcKa<6D~Yl)wy*~F3L32~F8n=q$jK=P>I2tvyPTY$cp19$b9IA1yYGKwb)djc?30U-
z1zKyLgfPP2+aL3+_ZG`fGJ_!Nh*~W_{UKTUwf^*%DA@lBO@Yd;n>!Ks3A5u`x7L(b
zfZ!3<Dia~P@HYbcpD#Q7PC@>lN<?*QP^)a^U=RDMoVbjhJ<100Eg|_;M{)HhKgs8R
zH?tpL?Dr(LZDFAP+{k$KRWJy%Y@qDNYMDLXX+}ONa0Pn(09T{F`ewrSrwnTpfYt9S
zr}3Bm43PV=4()4}Zw<n66Y@1{m6E{n{zgiP=Sge)7lB@WxAlLJQaY`kRbi(8)5%p%
z@CQ0xK?2|r!l{JQ?HZk2w=Dl}sFV1#&Yz@{(%;<o|8Nng<uB)h2|4673FNOi>#sP7
z*THY8$F)ib^#FO2zd10jSE~7g1B6+do`T*whf1iCMs{`<Ccr1HsjvQ?=fj*W>~Sy8
zadPv5hR|niC~wHl#l0G{2cN&C4xtZ_7$N5fwOsRl(eHex4!^M9e>7XOse}TtwR=jq
z_N!m1`!$O9P5pkXmmuIBUiZFMdJ=r?|93j+XNTnfaY#bokuc;xwnT&H=i|@6yH5G6
zy*YT-fSXfV1^iGovPReu2nPsOf6qdHFt^{sH`j}8e{Y_D265ygXvmLBy2g|UO0}LT
z5ftmMk5u!lS4jE8e*G=s@!tk}{CHEvYZN~xFFPTNK+u-|KJu+M>OU6z2;r*l=;!}b
z8{p%g?9qRCcY~ix=+97Xemp-S7}PqW{&Q6us4=9;f1qlA{YnUkr$QgJS1Ik+?}zxt
zXrY^5DecdC+kY88^Yi0RJ9u^C;ua7hge(7h_$)vm5x+3%KUay@3(EgKeEy#j15m*K
zS*I4A|EEb;Zh~I_7nS%|jQY=2;y*VTAY+TN0{e^l^=GfIiGg*(!0TN4&9w;shQr+V
zbw^xZZDP%7j$kHhPji32h$}CCC4X|IAqZ>X0%EId0ephE!!L)z{)@cf&xup<^YgOv
z2z;+Oe_X}t&&R63u>WDue=e4;7pnbj2`g^ipZN+bLTt(y{IdG4vx+SYs+>7xZ7O2{
zzF!Uc5-o1d(@2!bcX6z*%?Z!{H)C7>=v*on=MP+!mjI7zP?U#oZ7qxv&gldO@}~yH
z)~(mZ`}98<MZc0)VBl-dV1y69^#r!|OZnDaCcbD5hZg{`0Vc0s&593A{r)4}uZCs_
z|NhfkkiV;3`<Ddr-KiRfKWms0NZ>%#7^^HBR7P_uDFHcx=|aDQ@2xXba)7?O1NV9L
zTco~zQc{9WH@G|M)@%F04f310*Y|<%^+Mxol2||MasR;NwE*{zRbv7Ki(i8RAOMA%
z%vu!SBGg&@ePCcccl5vWDcCwZaICjezbp60|I?yFZh}`K4EDzd!3BQBsQ&~`wLYW)
zg@tr>9ZdVR89;8X-^i)faHZeOpT0pFbbk07KpO84sXkr^!6AvR0W>bcwY7joI583w
z_phflgctUIk1lZkT3z_Ys=r1c{(Z^--lYELYQxV*u70vnzZxoj?)ar-<YN5weV3n{
zp@0m3UP*rS*#A?_Rb0ZS3IqxG^LN0hRjU3o|2_tu#;^I8NY-+Q)w_g$`vi9b{{Zyw
z!Qg7#qMobj6A>{Gosl}F=4>z$y{VX{WwwfT$CVtpy$pW0mhx#vocYCybh%=>XGWfl
zxea8;q_(j!#qA-Ddu(s=X0o?Ts2;Ybz62X(cUI9YKX&1{Ykh4@ZW$uhZcqR*nv1D(
zsxzm!Ohm#!w1HHDh?wxJ8s*~~T5$d~SMa~?X1c5em-*@S(<Br%ieziMz_l?B7y7<u
z=<?%mqCgm&^v6LZE?0BgeVYSe5DC-u`l$9NvS@R8|KY-qW6EvZP;N_CT)XGc4+ArB
z5c~8O+g#-Qaok5<M8-lp4B$5jt0#<nhZb~Y@KkWmSGnNtG)WL;Ms4Qck@<0OFe_sr
z4!!-`zmq0-%7Wptmh#omy+?i={G&AJik5xn^&j@Yi<d%zHSu}PP0Ak!pFDh7t9~2p
z7Ret(B|%5Z5Yqe(R!REf;4YLBtb^D5uO1@udQ2TA;8be$_S4kkNMCn~&!olTW^9Nz
z83%a@8sNhK7EKa;<qXATiT5PeJ%wJ*TM54?JgXc3Sy_;pn30L|CA{q;`;n0$!|+5#
z86_-EYF<~!7(%wmRQZC(c99j&e=;o^ZF_>gXp21-aYG3Vr0BTf@QJ4-%`@8Nbr4?T
zq=JFCl472-lS<rYAQp(_@8~Ibm9liRep7VA8H!_DynO3qIuokG54bW;<?Jsm*o>3j
zeG0IXrypP3Ny0#(P0WnsdQn-{*wInAVou?6!Yk8Cw(XoieqQRVag0ohY}+QBI3!qk
zwlENRNl~`NV_MOUYEFoO<%g#jl1OB7<~yw25{qWEqFw0J>5XXdTa;P5A54=<@>#lf
zNk}SerpuF;R4VY9TO=yBXyvi@0qOAz)VQV>Hf$*8&fY*wA-8?Q2_xppVBdqJ!5P$}
z2Aj96j!g!3O!*Ga-mN4}%9rKaEMhlI>=6&n=zGtc;*HyOhLG@$Kyb#SDr6#DX^PVl
zkG-SKvD|{I`XxsJcH~ObD;0)2j>Kv&loz)9RDF5keSloti1CNHg5G^E!s51^W*m_!
z3VnVGQW1$OKrg62Dqbh{;-$V%<rq;yADeleOhl~og(_ZuaaMZt3K5v~`^s|iE1(-E
zu8pgc^CAJ-!(1J{eG{<-lLX@kudBG2Ng|b_o7=T`yup;OnynlOwD=ddeCbI*H{-^e
zaeBCB@ruymXFc31l^I9AeA%jiw@@G00Fc5RQfO`7`>4f9K{q`W+bLERpGpeY2ZI^+
zF49NAV0z8NbI!LS<=ZAEY0;-~20&3$3#802O9J-9l{Uec2y}BTa_`Yq(H!*w8g{DQ
z^caO43z%MF%hUv&01$mYN6NMDL_b0XL_f=}SV06tzg2)o?nlvUK+{Ve?BlaOVN_LP
zsI~1owQ7STJE9-vw*hoRV-3HvDw;bCKv7AA?mduTxCN#cEt6h_s#SY&JGLPMk0}66
zcLI3Gge0p~#KBhyHij<^e*f?FxD_;c{(vgGVler{-V8b%CJ`pj2qxcF$gX}JOkRBq
zp7G=4|JuiuNSX}D7Al3{GyB*_CSHk0E-gN3?_&;ATH>4PQALDDPqfGJn4rhu;_jxi
zpSa+CWeunSd+qL#l;`1g2L;=Z@7(92V-p@e@s4If@o3=mf)1Eu)5+J)40ouAEz-T+
zrxtHMl=Tdr%ZD*%?;hn@S?Lta#w>*0nyXA%kbyIona#H9WoP!ndkj-jwK44BEA#?y
zN8)IvB6(Me+mYv)vsy|$dF<G9AOnZP@owgW0#W9E!(JmH5>+LJ(clcmkd-h~$BtfN
z?RyV8=i#!V7^x{L_|+9N$13Y><&zotb!nY$&x0B>tIDj=&Rp3wt^5o9Q(cM8!PIpT
z`8E4jT2Io##zsiyzjQU6Z4Lzbd@p}53(ih%0Xwlc2s=q3N9L8I`J$dFq<@|!pBsaf
zXS(Z_<j>&bc)Zz<^+rY3?Gkfu?IP>F=h!YrE9g4WXJ0*N`{B}^_Ac$cUdu1$;2Z`)
zHSI$p*X7%Wri6~Hx;3wlz?U&sh2A6~escKEtbALGFHGsmR5#tnoi&j&y>ER7%Ch|X
zgY&pVW~H-Tl+|L#;+lCpYpmv{U0cW|+3I41g_^Mkm$oVN*N<9Gi2DKa#~wPe>O;Jy
zseqeo#opWxYzXR_&$qvCb69FlZoB(Jgc98xHUbvPF4`Y@vnI00$F4afd}Zas@oW*N
z2CEaSJ<ms)8pGcr$dI|oOlDhe>6du52W+&%Fe}NPBNI{Tfv4a!nHN~`BbT5F35HFO
zF6pwTond7e)wx!Zv%BD2_D!vrrNs8?09wV{dz*QJvkeOcv1HjVufaQG&G>s$Lzh1-
z*ScWngXXYHB6SDF4r5E3Y>omX`|7FB08XWx<+OJn36(sw%?PR*gK4Lr_NZ|9icax7
zvEenvTiZN25xt^Eimxh3zgV`R9B5u<pI4XbJ#+GgeA}7n6DC(6F!SOCP@VXy^Ck^3
z*&)1~$@(Sfd2~H^vQheh>5*N9rk<F`$W5h_;X6gUXH}A7MBP)`!>uEeE);c@<bAmd
zcUoy4v2$|QxemZKoNRUdL|)F+60B97o?7HyYE*Krj7URPmIL!F`RYMB)U?uLg)(r_
z$&ayhX?g<L(dRSECl3XLrIv)YN4AEGUTk{NT;J>DQoRkHKNj}-c(}tGpy2ET1y=$J
zj#@DiB~d*Ct-ODF-=OJM#GKL#mo6u`_|utJ>H1duJ?iBex|VF&JYu>O7VP#f^thgn
z_Kx^n(r>dn)$3tcBWUCip-V1`!72_5ut@?RQ%i#LYZAqPlk|;>r`(V@LH4MG$}uy#
z$7mC+mt#u`!qTuT$XBh|mKk7Im-a=Rw~%1)13dQJ^j3c0Ia{azq)#wc4OB&1Iat;$
zB{k)e@URI01FgSBExYPkiAdrU!3tpG7xs}bWRL;}!*l$D1en3a&s6+)_hbn%=Iao~
zABHf#fF-w5l}scz&FtNeFu4Ol+07hPo;Lx?mXYt>@SSrvmInQ1+G<+fil=h4)NgpU
z>OC1wfeDk@GD&&?`lnV%<#;}yujwvJ#I41n9XYVU+TJ)Tle?)8sT}W?)U>TqJO(!K
zgvA7JpSJ`tsni3iSugUi)a=mqCsuX*qLs+On&d4Dt7fd`@i}roEUz^cj-|gao0*M`
zqlAwZhAYiCjuemici7caWGg3!DII`0I<|I=lw9)D&PKP;FFZR1-#BhK?LJDrRKt+X
z(pobr>>!$yyRShN5wXKH*^Qk1vA4cv`b9C^>KN$2bQ(Wz)Bq9^qIhi2I1-QM7U`A_
zDsz54l2!^CQ68`*JG00ou8WNuia&Ty>k15TW*nx^auLri6$le`e%yWo8>T-jWs_VV
z?yomGdw%)iyDQrqb=pUw(s{~O#Gc<um$a^&UU``^VGM7UF%w@ds|w$-Y`8n~WGnLR
z1pt%v&>SsWS?coaJB_<b659s@T66Qe>H*EMjuPC8^L7*t00thrk}d#4J3$<r=I#FF
z9vm@}Ct)*hJQwd@7vuCo=tL@0=HhG}JV59WCO?{kr*r=1i}Fdssaj?0@ZQJ42h%jt
zc1_6pz)lzq3NQ=e-6(}R)bXXwMWAD8Qnf6qJgFY_dv%rMJVkBNCM~v<H4D9w+I=ZL
zX}vpbhF&SKnmN{0hHK^t*CBbBF9dThCMuvbUVq-Wg^eNzGm1hF9>tdW?`*K8>_!>z
zFN|kyo7Y8&XV(P?=(@d9eB>14Z;x%WZ>*kNoP#%4g+;f(o)-*yqR>Wjwd2pLCw2Uy
z3@@PJ$Fn!@=%yPzTNfkP7_Z_PBcu`7f4Y2<3?+DdEF$2k+JyVKeXGGC>LG&~X%AiD
zg(^3b*-wq(eDA3ylE*I0PMN`3P?1LbmD0~woC@3XH*z~h4c8x73T~IY!LjF_!MzfB
zk=_B;>;ZO|$7>EaYPw-DsPG(F(Od5!dYrgb4g#r6+6<fk43-j-ierNqysB)hDI)rd
z`{z5g7u+%z+aJQmdrChOG3qdGZLmFFrLQ3%lGk$u+pye}@{qb~DT*}snCNuoO_gAC
zy^eTh7TEkY+KN)UfSW!&&GEJhpX4R-%+1g}zA|LhEa+E0wUS*kn1bcRWy`!U>g)0W
z8ru?#IL4ZiJ?Gm}7OukukVgD(eKq2PW8cpQ5{2B49VN+V%Fyg>AE{2{8S3vX{#c$+
zG+?XT#zL2}WmDnpa@&`cY3ZrE9Y$iu9=tdvP>CFc3+ya$vPf5yUud%4Zb=s@RfIl;
z@hZQjEJZ)*)81k2{xkwDggNvqYh`fR0TaWad*sOF^zbn)K_j<{!tlOn70L9a&oR0@
z?y>ETgAuv<O!wxODjmXG<-37VS-zMi$C1)w<ba;I2-?wra}_Dc32idn#$Nq2PS;9l
z5yOisNpr?Roo|l1(I}-Uv5q_Dw(n+++~LwQg7!4MG>lE6uJR1;&QW5DDAC%7?YTV`
zM$0|Eu&E#oGwPkA8`<T;mu=R-zt6@(hBXGOFJr@TG(`XH>>1i<v<^Jg1npX680o8p
zQ0y$=wKZgoow-K+MY(o<5EJ2dC*AW7@CjyzYnLPXktVHIuUwcGc2?xiOgU_R;yP0&
zht27N3MzONpB6(%?(E#o(B-teeAi={6O?fJuKvoY^oWYd+{iHX!#e=~m;RE`ya|sW
z!~j7kZHSowg5**|2y&I)Q9T38U*t}k%@O;m4dV}!wqSI;Y896(k=mE#2Mguzx)`r$
z>cYo-ho&M`vlm&(OS`mGJ>uHhE0S+2)pt$Y^wQFD-KG*J-QJXQ*@}f(arFanuM>q&
zjiztxY4Cl&W2b={D<+IZ?@*RD`)M~FS6as9<M-y>L#WZT1wk-FRzvaTlv3*CB|5gJ
z2_q+m+ZDHdsq0)iplzM?dERX0lQ@&Qp(0D9me@<Ox!y{%da<EV#jOPf7>swp;hIwe
z-2%3lNBVEIo($)Y?h?&ZWE|-bui3u=x3@mqz~1VeycSI%C*&os`9f?!I$J$TE=Xcw
zxZoy_t#)>l5-W>NSBg5*r6i__A}@tegXhwIp`QX>tiFiA&qjZcN>zx~SeOl#W_~7;
zrqvqgxea?EBA+CuDgR|4%-7Cy{|WhoT`*r|>Blt3XDhNuMioO}yJclJqU5n+(`DhG
z^5VL#rD+3#cr_29lr2b7Ve82pbE7OWQwk9l;7HC?-p3xAa8UWB#_OW60B7-RrsA$Z
z0C<Uu{(kRQV-^x|(1EmHdGkiEk5pj4km5Upbm<(GS~41JFkZP0<8#*0nQ?W?!bb7h
z3al(UTsP$(oQG>ZlW%O9wWj~r+%>OH6JMBN#uVQX<t&=reC1QJ>XY2-Umm>Fn{M@$
zNj)C&F8H&0bA)+3+>E;B#O8#OxTV?ZK=^ofW`<U&8!|yFtUN&~F=EyVy<B(VPW<$s
ztzZ9O)ML-N-n}BT#jiw3Z7S+=7+d?&9*37>0kNrfz}#(Fw4amr1?ynOiueTL4%YKf
zz-7=*X1kXb)nije<c7{SsT19;srQyXiNjZfIInVsR%dxXAxZKx-f^*Us6|gr1LNzS
znQ{UxFQ5@Tc<SuaX*1d-bsF>31yM7L>?2(<^(BrDS8N`2=C{QooE)yD1|_7Z4L0jX
z%pO!Oo^?d{n5~4juV}Z#hw5{f7y`>_rMcBI|4xX#dw#p-8-<}xfZx`wPaWPWQ?L{+
z#w(PB_>7N^s0C*F>mB0s_3*6AYRbyyOZBhMOTTz1u)nOUr<Q8kuMlfr@oM_>{gr{e
zhdcwb<Ms0upJy9(>mV|%KQ+As^L5YYPc&jL(Zv+HPZ%bVZwSs%kPh*{pWgeKFKe-j
z+ogfi{w|68cY`xvX-Q`c2lDjkM30_-rEP`Czt&hE8oB96?E41}sx1i33<Y9sr9p)M
zBPO<5<;5b)TzdD}hTzXFXnA^(2W;nDpHe?(u`al8CapT|6!+$=lE#VIVRq3jR1#C_
z>n7|dV%Ap|!FKA2+{BqW9@#IL^xOm;j8{t#T^c546wR6Z<aGWChXth<+JQ(zBE!+V
zr>gzK!x4rytc%M#(&BQZMuU~##Ez5$_ibY}otZX2E3a#IcxX8Srrxj7Z^b7nwddXJ
zb&pQzuoiT8l_DaC@eX5Bg_K2?q<;V8zWqGXlffOSIanus`^83+r2K+C<0@oDdv*<N
z#FK;_z-yU**}wvPxz4IDM@5wNU~ThD?v?f0q}@JWu4$}%t{tg_u%bDnKEY=7@FOi|
zGC=)Op5K+@uamJamBu-Sn(}CjX8d9OmbwmAPrhhsVlUh;M4)q5I_k0cx0_y)Nk3Qr
zoKjewAvHmQy~9w;R-G<9o7*1PumGs(jonpv;u!~#YpazD2S^y+LDVpBoP<ngRADig
zh121N!mWhh5D67l&3y0;7N7$sz(}h6AL94)9N5#_g*P(Di77Gw#Oy4Tz_=_t<$IC%
zbl1n>xUi=wf+UOf7o|h`7($+S>+dgX-iC+b699_OO^`SOC~kpFdnCh1NA{))`;s?}
zS-e5$5%<D94<ArDavf{hf`397qJI;cLZBxg`iI=V(Mt~sgVD<G--t&%$eXQ(o8;Ia
z#8YN--W)><$Tlvfx!NcYnsKo|CkPt(oMuE8tLPBR7Z|*-atcNTPc-QS8;%3n1klRj
zgQjuHKsFBKM)V}A$_yuhGyI3Hvn|6CP8&_FaEqAj&3;lg&{LEr*CbYFShVERk~OLx
z<%-%y|0FdoM5h_qrW?-JE>g5~GhgiG2>L+X^<owC^n&R8_YdF;Jut_?gK)!>;hjR~
zguzMm+~npwur>;CTJ@78%GBF`dzyFyX^@XZ&<P`2V9-1GoB7o&ISfo^U(6L?-Co~-
zccYyS7r<oR3}a!fJR)hQ#@yAMKBj3#Sh*y%ce_ToFFy@mSr{z}bDvY}UPw>9!NL?)
zx7<9Uhb1~>eG42truNJdjvhcLY0;FjV0@aR6uI(k4$_&mh44i8;`^Z!FKgNL4q**m
z8fy>L+=>Wa;uph;+0xVqTTkq)7!skyI2OD>CO6j}6s{e)T4X15M7RoUig4*OD*V|h
z6M&z;^j#02=yxgPbQnk6$2dDb*<_QC^XZzU-+WU7H_4_P=XjPsCI;^Wj56t-|G~2Y
z3j?t1LH%iMi$+AjW7NyaaQ7SZqLnk5`Pzf(wFafUQ_JaBquT737L;1g56`}oHEk+!
zDlA7{6N6iY*j9PGS552XUx@W*LS4*i=Qj)ILm~M4X|o09&C*JqKZl=<GUcD7&Q@lL
z9Ce6nZ$g-<Ry%antne><d|%r#Thg3zm3moxXKmXP`w6aIL6Ly^sPw#EQF!t=GEXb=
zrf96_aN0OYmd?W1*z@P)w=1^XekH>guFQxDl5b;_RH%YB5iHjaoU%2yi_(JFhV7v{
zIr43qO7)}_3a;U!??^7D>+_sblRn>qZtEWT%v?KAUr%c8+kn=u^;mvO*6CEs?--YV
zW?MGOkjr&IG27kR>4U?8Y@R625_`3Tfb-Gs)cx|}UR<)OSIqV-Ae{zu`$3n;5mnFS
zu6VnZSdq$TfoHxWzRHiRozk>*Ly1LS$NQFD)MrvJ4xoOmBfI#r2u{0Q;QUctVpkB*
zVzCaU9P{Xy9a5&K+0EzMw=?a6W;Pn`G3}PFTg!iO^^kqfgBvqOk~#0X=$xHzT^r@p
zUtTMeC@gcaqb5SWt-vipXfqz!p!A9J+&09aJHo_glDtcLmf>5LtJK{b8+&W?bDUC3
z;WxzKvx7l5pQ%r^^03e?r1^hH(iiTKwh$?td6nALVaj~>Dk8+)>k?8l+r)rZ7bcR;
ztiORv`g*=tw{#?jz&_{L_Kko}&1%A@229gZ2WVTG=1s|ZK1cL-r|UC>FlGsephx<)
z)QvgyspzgSWH0G^>(QieI5on1cY7Yl^ZvYvy{@=|Dx9w+(IZIPa4E(Ag2H_k)l|9c
za%^+G1|JP8ru2b!0s0IrW}`IYh;iT}db~2*17&{Lw~f_{h~zZMT@A8@a)G*#-Y(>P
zo1&$|RT(&DEOPI(`<P)okKuUSvzdJnGj`%0_dX$qy|WI#c+XdJDl_HS4CPr^)BW_G
zXTBz(N!>2FH}3@BSI@<;=@&Yf(Q;|z=vXvl9iq)Fb`dvQfWz}gO=nx|wjqlwV*I&R
z0*?z0bjBUaFS|Z}l-b)A8R6@O5Y1{TX-O#9LL@F|#JZDS`1y!{uUWhYOT}WhZr?;8
z2u_?_^bW@Bq#KBIOfgnYfWuNd>Cq=tdEUcivcmT4#3#)viM?m^%X5wH)oQ5ZBT<f1
zi;B{p?nQ90hRj47E)^D5XWc&6;h>O@o>LC3XL~;)>pl~cq?B(SOWEybjp)uqlMfya
z$rlyLPJJ89&OMSY9^zAWDI&XODe6O0!qpeu`8{WsW;f-axzk5ILhLX(!Nh3^4h;<j
zyKHhxq9l4gK2|)cM>!$L3&Q5l725F0sp?n|Z}fym0a6>}D{=RPQF2DeR`VSm9V0tw
zH_S$=1~$xfjHDN^Z|anGrP&eUGcq2R`LbKsh%ep<5k2Lbwns}T5Ox5g88o|qJ(9C;
z{|D8W?v78dlrv(BkOTVC{tG>KBDD-!X{eenSz6h}^CI}jdL>sPwa>a=OP<sR%)3px
zK6ke+`KGr)+-YwwN<UysXqhh=MxRE7Yd5DJhF>%XF_^2N8Tjnm9dY1-mTLupmp?}B
zbyWRDX`AocJOz%3O4gud;eNCF>RtIA9ZSi_CTY(`uoW1?Z)K_8WprDTOHH!Cd@(Vo
z-qq3+0pp9;Kt$V=%!gZ|nTw{a_|(hA`gpR%nF@?`lo|4eN1CFeFKf`vn&*dTOBV-G
zl`gr2^OZLg%1uTn?_bzdI#4+M`C)8>&n4;SvgEJSW;7nnKbPcr@;28OCVi{5t!|{K
zm3yeQMzE;Wk>>eQb>4^?A5wjT_{B_arL4nxARsG!S&kO3FQg!BD*NVm7|8}wQ_`dH
zRC)BGXkmTnfR|SLG+Qn)=g-`e`gkkt#k3c>sW+me?0nvh!V_q6VWVWy5qMSj=zBjW
z3;k?A+Zi#g^SsHi)FmDw%MRHw)~=7S-5Sar+^<n5D&svE?*v~-&8_O#EPwn%x;||7
zNoVC5<Kl4YHmX4$y-T^?3#lRzJ1)lQIgWjbdtI0pG4EV4dMo9MzU7H}n!J83<`m72
zjuA!X%MJ&8U1?aRI@Ln?J?^<0>MG8)6w#bR3uB%_N>y>bjSNQ=ssn&hi5u+S0hB5b
zQmR3JC5)H456jb)tX|oScbbQm*fED&#0w5zSVk*$9JXu<*yf>Wf9ZqIh$yC`=4lY7
zEJ|g$LON+%d(W9b+tlP3F{bl3Yf&P4VZKOlkJC)^2SQ~SN1i%w6~PmwIViKb%A35n
zDIq7wZR9@#OS1{Ii@zP)dqnIdu+nddGOsmOstsAGqxR*B#ly=nbsn#s^sY;O-!+K2
z4mRB60S<?oLQa)&q@lykm{1GA{Z77|G3bN7oQ@OYma~i_@=b#s_?>M6I~y9pQ39CG
zCMxJTVoDpG?GU5dto%X>k9S^xb#1!R#|z>EIV5*g$%=A<K9w|q_@llBxs5x%(GUPd
zg{qJkkTw>kkh5bP5x&s(mK#cRg?rfv;T!D*z*9pSsQ^wy1(e_xh?ga0K^<u?>m9&H
zax8)8P^C`r1X(&16h7Uzms#yHh%d7cMJwQ`%4<2mjbz&NK&-9$7Pw{CSAH%S$}7E|
z92~~4Och8ZG=g&ptjq<PMNhTh17c{X4WIO=@I%Rfp}1cRr~(|c2DhU3G2c#N1yoTp
zL|pR*F5wpxL1bKji!)V6f}y5lD#>UVz))Ky2JhmB;)aAs$sY>B>XuN1xMS}{seX_l
zESeO1MvPzi6R>gxrLbino?=M6AnmVr2rMs+!kHbP96WXez{Xx9yW>F4P-X`rV`?$b
z%Evddu969$scWX=4^8Jq#d8`cRJ_$|FzdfJ9XOZ{{Cc?;7_sjiBNCs7ej5#BGq!yQ
z<g$-Z0kP4N9qkSZ0&NuhLhJE5A9x5*?VBu|B*YZ=src;}b^<vc*ur6iFKxI#26W58
zOpY9EK_HMzksQNN!Xs##REzC*;u-yRAQw|TuR~ycAkDDxA|!r6XR>|xAsis$Dk6Ca
zBF$1D>@7`vMt~90hPxl)?eO9^1=@L}Z|in1i9#rK+r1o6^9dxyY0nxl<Bk?<CqfBL
zgp^_^?jtl2+j2d8?F7YMpfA-=i6t5CSObA5=dbi}U-}MH9*cmfcJFQ4NW!2(0-CTB
zOBEgy0-_1s&qa&hLaFNjcevA@QII}@6smc2<>}2Bz{DqZIpWoO7@RlsoUTuUl9XML
z$o+`|+2x^H07lu`P58=-Z*m=imO4EzrhEhhpreYfrQ+3~5Ewvf^dV8e=N*oMCc?xY
zr?bE&IBIj+QT{|Mmmsl}lm;|c1F<lpjBtrQiM3)m0IBqCBB2siNG!W+^}wD&Yg!`~
z14u0C<9)ogKrF9x+qU9%aIIKmL6fv263QIgAhB%Xp#>}oTGJY_+`DW@CKCP7?Cr%P
zpO>R@Q%=3SbdhJDD}K8vj$H<7M`RuaPDT<8$4Q1;lFg<D8_vGA*@HhOg6%p6rCb|T
znkXR({srJ}RfCxpq0UCa1!zOf%kX6AsO57Mv^?5<<s43IU?QM<A!6lct7)NNP>hkb
z%OwqOqL(G!0y};<c4*`(n6=7jpbu0p;t@D$5|J1~hiC-W=p;01AsDsw7QGq%5Xo>y
z548R`AF>(_dJJk=3N)nl6RM6R<UzmZhn=qjZzLfBY@&7}p;LxCz|6wKp@1Gk1-NtE
zQA`pKP7?P4g29-e+9<DGV7n9B=C<Quln4}=;LD@3z#Sgo8E|`XS%MFHW=ek}D2D-8
z9^C_rjrK5mOIZC|<;%PffRR9bmIGj8M(1wdj@xK}?BGHxB|{>#CLbOkxTumFql8C;
z?t(Q1*$?dlttHCAdX9`S)IxHCM!s(hrcNW-a1Gi;fNI7C-@p+h#?Ao2DV1HIfkw>&
z`QJ5)A;EARJj!KH#{`XfY&RG+Q*Hp=Fg?_)B(7iNSU2i@ct0Nk&5CHtc)(}==00gj
zKf1!3j9Sqy*?nQQAo>}8YrWRc7>WCOBp+BqLUpZ$_Hh}PpMH$iL9lgNCZ~=pPBrAi
z>mppnmnN#{(Q(rL(svv`y?O4~<6>5{I9Zcw)tTpVXl1HFJk@1tphw8ib165qc=EMS
z^h$&0a<hh((Was=AN;KnqaCvbV_fU!XGakMqO)y>2k9?;df?a~if!ZXM4E7vE!rwC
znTg1WmG+0|j(mC;!2UGgoiCeiUXCcXt9h&tla>@})(~UmI+A%qAg1}OcsE+#Nnd2V
zcnbOX-SZfgM0`r|18{}=Wz&s#Ajuu3^eNH`krNqy{O}~*J?67{`8}6v#8wvVI?W&6
zI_Y&haB*S0tn-6Ehcza>taS8g-|e2vOG;Hc_Y1z6Z8xKL{q*3*<l%WRnT{UU@zLlb
z=bYbPqeWj@-a_y8uAR^F?J-POLAF`VHjd(V(TV2S$mz(q=L)Qu-b=HcsF*LF{lS`+
zqh*B~<qw50ejYDfupYl_H&{QjM{M~^pJUg>s`ecMT^FVX>at@*T|1s6#_06NUfJ>t
z)PO-c;<XhzD3hKHg(__*HW6Q>t?^^ua_u7!FpN3YElc02+W}qX=_xW%PMusnX^|KW
z*VMZxP2)&b*i+)#Su|dXph~>8PoTb?vD3OJmG+zgGP6SQ+(6(FS)HZ%8N^A$4_a5g
z1j-(qxV_B}CLO{ku{7JHz|jt-KE6Z;%VfRoce&SdWy!IeQnoEMzelIyeFP#clO?^l
z$zy3o6Y=)(xq74!uFQymf(F>~V`pL!NVnB4OEi^dQPf8XHHT=!_nuu`o~+?$@q-=c
zOg9uS8cK*n)`oGRGfk?ZPTm&_HMnHK5to99va1)Gtrr<@V~Rwoj?OI$$)BHn*g3D+
zsvhK;+oF-p9OE1}>_(;{O7ceVXtmuf*0T}3uj)@uDY`C<=9k!!i+#DkF^owxGPG}$
z<~OU~&+%dT^Gov1woDVN^pe>u1@?lYh@?w-8;<YLePLkHR<+;mX18OfmE)UJTU^Sv
zv`Vu`8<6wFxD4#&=#ScrLk6FHz_hwfUN3<*#7~aF*I3*6_Q^@PqQ2WA#k_DnY5SLf
z{S$pvx&a48k-_R3ndIV&oMuUW%CQ$K_ZpaLH+~G((0Qf6r=gZxm|&0aH6g3E?k;q>
zG34ZHPS#-8AljOs6bYw_^Vm8|#vFA4Of$mFl^sNs-jrOL$wHORRZ^!bMe@&hr$lgs
zEIBmkn4|_-=&xkeuxEEW^|~L*G@#zz{<tr<exU=Ao-5`)-*$P!#@*VHDaXpnr{3S7
zADg@KGb?#wJlDIjoX;$AW5>P$>6?s#>TeV<<}JItf*-4$*U5=XF36di8gwNta=@U0
zA;h{Q%H0P#8^CYf^)xBY_<iY7xdvejD@PKBws^8)(sO0O@T|>(<Co)YhHewrm|xh@
z)adY}ywH{R6?=Di<4NjWqrqA`{QK{lYkKQ#s=7{2KPG(z0SYyiB}oskESMs&W8Jhu
z4x_oPc~8zKb^tM|=;jyWvvEHA!7hB%yK)NJh1aqSPmHynR43V{5?}CpEC1$+>THOv
zwe}{>O*#DC_UCm`B{R00)2>|Dhs^MzRqD3N?=GG?>X<t=2h0mH89szz(iOH-LiV5D
zBw;~t-mj6Wiz;bY@_5^L2{y(lB`r8vOXK5WQgT{W<3m~9MCIsWj5*s3ka^J^Ese5M
zB0*Uta|4mCjqia;6l&}))zYxn4=&6d(``wI**SOTH$59K8W(Gp+)RhVc?N3$)`Y0y
zvBO?R{a|F-G@5#iA$qoFP;8loJ4B+c>yyf)HmVoq=6P<-8!ubwp?*zO)E*&K5Z5l>
zJ*%t2YUn=GV$g4%ddR?Iu75P>yz&HEYVb%1xyT4-`e}y@t@fbf=Zo!!E|_Wu8eE)+
zOuCNB?{@6TJG*>oIHFCC6=U=q0Osg{0ojD?i55roB;GX3UT}H=?&GpV7x4Xk16(5z
z%&15apV?J|?Qu;%U|W4CL2V?vL9EE!SX(_kxGDA6>Zd!ji?7kt%*r{AqaAD?9=Kz>
zTUj0lv+d9ggvDQExuuQpiglZOl~fWrMOm|~GmP!^?B(yWk!;vZ)7k+G)!$83)4{FK
ziJTjHM>UY7cR;ZYWKp+(xO`X+BO1V(F-Pf_IOgJ|yo%kr&cPX&th4r%i6Z)Va9>-z
zE4SP0sXvTNELBP4UYPoc&l|~4zKh?&qI%+iMoM*78IzWJGNa{HCCn{#2c2svIWmG>
zYUT>((C-T00Wno6>|v?Xt;k;_wHGsPKU<*jRx{9a$)o8EvkM~oB_agoztsoYXM^`V
zW!2$_pV8fDYRMhd$Q#gnIg8asbJ2w!{vf}11z=^WLMVIB6TdOc^J{2iGoDmjJOW65
zWK{+&Bw$Lia(z}7Iyz)W>6ry8de6lPbdoyDc&hmFmzSNpho+m)V$oc*9Y?K$OQTZn
zgi0l(C1SRmk6J>Uk&q|ZawI)Tk?mA+byi26K<|k9<B(ff>Pp7iO9OP_Ge=%6JE$`#
zTD<8P*IG$O?4;wZ?z}Dkq|fqF218)_XhzV9o>F&LRCkWwRT!|g6GwM@tUTXBpR<JR
z?e#BK@Lo9nZf=(f%UgjcT%|3=u}#3%jz4aK&H!Fv6h7Hn>`ca1c8_vP`S`3Vjv8F!
zPu10+<cRkj?3~OzOTJ}Sc*vI>;|JI>Q{>T9*-5+7ZjgF#arQFNd~E8lp!uAnPy{?-
zi9_2%nOl>G>wiY$eEPT7wi<eN-xmDfL-(q5emGe>U-os&0S`v2QLC<8M62FemL&I6
znNf8_($(tiI*6<#tGr3>>G7+%gW2_(C!ESWmbMOEvBS%r20Hyb-Nw19-2D)-tyyj}
z37E;#9PTaibm3UUwpdcvKwft*58Rn@u#nd((V<bA*5&ws$Dz!^>8b*}|CI-{OY@3w
zD|EP{palMHQus8b=IBxi-^*ZgoqH)k)}vhduM@Z|x1}>FMFfEvL0lE0WXFLN&rCE4
zK~gjaWK*_NTnjJZ%M#F(SgK-JAoBKD5VWk&zq|oAB;eXXfPTi^e$EPY)DNoO*|)&b
z@Tu#|68iy%?EA!rgvtU{j)EB-_Ik9BK#QqlL8-4`C^rLip#Y3JSnqd)a72)UYVivf
z>&&4Gte_~k)_av?tWLO{?9pk~hA^R*9tW51E$kgyTKs|`pT)EpA}edfyBoGg4tc=q
zVqkWIb+8y{eR4Ej<S~0p_l;%3lPYU3JUn&Z=<0R!ql?stj+}75DUaRRX$tJH?hu{&
zkg?+0Uh$es<Bj;u31+w>mw6Jn=Y7+4iTs~14I>Yq)3<#Ne3_z@oHs^pdp^1Mekq^1
zRxh1^HF5_XZ^ryg_xPCM(y<q(A>w?8iC7@UC3L1}B!hQHc&3c^<yoT`0aOh3VM**v
z`m9-uNb8aqmIKS+nxGUdR#C{&{SrN%`ye%USU7LsHAV~Q;bCs#S{%Q6L@gJj<*{^C
zb0j`LHs9-NPd;O)Rui3o1yU)JFLyVa4ukhSv3q;z!LZa`Sc5Os*FCtp_&Z%BF`1SE
zQx%h+4~ci^Hn?42No#z`-}8ROcG&+agNh|F)!VX_<)XST&9KXL5mwYpxASK%SzQpB
z?s=8hW`j{Ri~%tZAa{m-5@U{#EuJFPF?659O0Ywn=mdv-h^7?ZT+5c%bo|IL+g5Hj
zXfyq(zL?C%IyS(_i^gA9<~d__^IxnrMf5&NUE=R|isZA)J77B?hA!?mzJFEXDA}?3
zlMcgbk$fh3O5wuo{03$`omP>esjc#?j6RQEazgc3618OI8myj^{SvO9hZc}+XC+hB
zF&aGqs>v3snyA9#l^?tGtaRD*ah=^z+ATVP9HHz%os#((WUzx~PEQe%M2xij%4OBO
zM8$C9=0v6ZgEc7^wv1fzNiCY6&&)W0Q#^))B*x4kG^a|jz9PuQM-%8mB^E?;p3+G<
z6R7mw%ZK9g!x|$ypA)j>r@{gSypG>XY<qjWIYqK)r{m{?vd)xdcqe*4jl}p!NR{#)
zlYBAz68GhqM0BAQ0@1uVP)~Ev4%EWkF(6JCUCvwj(#^p?)ew%h&rCpDRm3A}N=a`{
z*ZY{>besJ6iXS$qrb&vl60u&nm(oWSeV2<<24`C|hbXpXx;6CWce<bLfz_}#*ZDKy
zu>8I?$%{r<+-rxvSM805?;o2wd&D#6s-t-A&LLORoSQG{R?W|6hnJjx|Eu%N?Ji3V
z85hr2UOHT$%`>ZWv#$1q3fSsNk{m~u>Z;!M*O+P3RNIdw*SzMa!njPXE5!)5$HSd+
z6qX%=bFI4Bd)WmKg8d<a602FpbIA{`j*cLdXPQ!Vb1lMc1-MrAp@R9HY^d)E+E_MG
znH$e?jyIQ_z7QJa7MzOC*0kUL5|?81Iq4<m*H7EyR>@w{e_FS+Y7(7Ab6RLJn{?wL
z6((YoQbDq!<_B(y$NRkMI`?bj?-VS0Orpl++V<JV5zRGNBpTtwZrN*(Bap8cDTFfJ
znmy+`{ZXQb10qg2z&wtIo;(7Ka*~N5vv{^cVZi+$v&8)<60rqj)S-}?Gbi&AOUn!6
z{EpLFouyc1g9#48X7UpUQq?3Hy4=0>1@<&|{7pR_pQT#lC`9-k-30gt+ZixqLs`@4
z&=n<Qhy3v64BGw4LB|S<rH6=PA-LL+qoh)&Sp)WpcPN)lN=2ePwIgSdWHi3vUX&DH
z-@x$)_$}+Hsu(U<F|6&*9G0G~d_Y*X_W~>K#SA|RA8#$Ok=iAzu18$xRM&hziFa|p
zG)_W%=zU_2uCLbZs_lEYjJ??!;b)4lemDAzI5k!yQWEWCYI>|JGnb~n82TIw?Vi!2
zAfA9+er!XNIhV1Jn1QBuf!&RGxmULXYBF{~2B%G?rpDSkE#c|9&?k^`KJ+&1052_&
z-UsNQf3_^X&a&LL_@o&8tyU+pTg<U*!>%R2(7}w|(6d8iYKp7~*T<3cPwPX5^0rqL
z%a5C-ftPWhy71LEMvR3F*fko{1I2IWm|(>Nt>QoQ3$MPRp<+u+Ls?_E;{Z}N+Ad>e
zr2XFL1Nkvyb|OMi`I&=c#zG!FEqSVvH+JTBF1x2gTC}+Nas>_usEA>-HTM#gv#u(4
zc6Q_;OxSdDSzq1%1QsR71bhnjQbf+VLm_=bjYG`|ogdGfA1P&_&&*5htgxfd!udmK
zMis|Qw>IsC&M24c6!l1rP2{HYLA}Vf&?CsV<bBAFM;81JlQ~xwK9o06nRn*}=UTLO
z2(z_54>2q$%0*^7TX4vY`$ARTIj+<^f}of|U&!h;FXHWe-)@JYjS~k*LRLdc5{;xm
z>a4v}Uq<=gmC#zfQa-Tt;Ir1)?YZOz^KF2siFcY{OY?@4l63&9Pw`lq--LbT8-M6C
z;2!P37LnC=4$Zz@iOE@69Nf@pzA%IAchERiw`quP0x)=Ar;CR-`)J+H-&b{OFRBw+
zTymfZTkT9vf>{Og`}HKRnN|VHb++@BZemx8c6RZO*oi8|ebe);{AQJUMN4QA<kC!Q
zLlL?1V^%M(jaxbbW$BG>MeybD<!e<$@S$|HuRgsr0<_O$FiVZ@CK#*JtTya@s-ee7
zrCBP#s}G`kQdR?9Uc!{eKoV|dk{a0+#Dy+=HjNQD=-Hsdhd_*%VLhTZ*S-p7&X{1@
z*NMtbXonX~JbO6V;}g0G%0RQEq7R4kjn6${Kj8X`KYApICoN#*!X8|;^e)Bwtj2RC
zXM6KltjpR8*;-Q>Bt7?>1P6ZWlpP1BVW$S|RHsR&b?h84S;n%MMnw3hy*H!-8G-7j
zAUIi}_xf%Qak{nHh+sZ#L-slFD^y<IgDW)rs04NWhG#Zch`&=O-#R+q>%_oJ+pUJS
zeW>Cy!wx;uajsWXvlga@MpdVr5<Y3#ckdFap{NP8BijrEF>~e0oG@XBB)+UlM{GYi
zqlv^#;#HuWWKyxsTznO}aWrmUgTuLg*c87qv)jybr@XGaEN%KuJ^F9vqIT&M-#Fm$
z_|_h5NowJHzeYZ)n#17D!TrabE-3PW-F+2KC%*joUUciYv96Ot$PUJ;Wbn2HFugGn
zdG?S1VeEwk6Eq5)U=`Vwx^@5DNOyYMs6e+-Co)?J5{W(8_C1`>Zi`Ip=^T3|wzl`}
z#zGTz7hZonjxK)n!LhRf%jA)Qe>H12ql#+g$uFI!90$}_vJSFL9}5FqMBK=5$3bKL
zLi^)S$(3(1d7XPG({5R!k-Mbaek5YEvHC}#A4dkMw!c1ox!N`>p#m#emd~PGRDUTo
z2cEUL(tMR?-Pa&yq^TJ+bV}H<;_x^Z(VM0rnI!VAl4m>%#ON{uY@d@nbd8%evVL>l
z3*u6r=zL*;Ah3HoNQ5(i4)97-rCVpHnY9DbL`Ppxb10yn?b)$<ntdyO|B<na{}O*r
zzou3X;AncuhrlZ%AXY%@CdMoiDFFf(pdiWd+@lmpR@v7F_FUCcuGk+Y>YBG>!6U3>
z`Q4T$dpNG*@OKl5JX46FtLNqJ+3H*90z{(lgof$fOg(1rK(W1<`z^3i^}yDlgTFQ@
zg|>1}&YmTVxrpU>pH<DR$FX-$j&mP|d~XZ&v2d+1r}vcnb68(=n*RIaZQT1Zqeh1O
zwUqNb{Y8aAbkw)W@Hmj0!l~RWcV!SZ>c}%k3B-8H!gjpJWye&2z$k0%25K{V7Sd2)
z5tHag==eh+nA05dAbtOE{NXWeVElIlGUA1Scs|zMDzF>u2sQMuK#B3b0q!fPcmr-a
z6xH(gj~dVQJ*V&J&o?yH4;|An8}aEEK62B3i%-8?5wmUOA^ZjcW2v>6z%-O~?3Il%
z!_0Irgl{xeTZP>WQm?x{{g|<$*J8`Vl87>?Zq*DJo3T)P_U_Cm<jQVWd!K$|>i9BM
zC^CG-vVS2;(CBVze#d6V52q3}diLP5V<2#R%vY;t<zp#2!(yYZ+e8KCBx$*AG^I@8
z#&X+7-PA&TYwT+v=gVk*FLJEbW%e0g0JGiYo4CsT-G@}{Gf$cfZ0{VnuT(Pgn(DNJ
zl}~>m)y>&CP^-7VY>7S;@^FB1<dqDxAeRTxLYqCg+BSEs+~z}cu!dhHEQ!J^&cFY`
zk<#4r9O6A8qov4mDn~;6uW)5W1C7Q+$kAQby8TF(^91z9V%PDBW}jGqP1YGP-J0@a
z414#k>Yp0ru|h46nUc`a`YP+p+tHz{TjAxl=Qw5Zs6j?dpZd&8ups+wGKWIW^K7ZS
zebDU2xXVqe+=;v=dwDiO0ggLxVrkl%d~w@*B{Q!Oc~UnR{a(;y>g~E2I7*VaU%OSR
zi4&Ln@acbB>qXYb+%bT$wvZijN3vGBfkR?=-c&7m8te3mF*IkayQp&>la8|L$nmWR
z`*7joRdO+N;6+op)uRGL#-93xEx%?-Z+qsou>?^wm>BC($Y<Tv7D)@#roh7GRGL+%
zRetUaTDJj;I=apeB_=Bu&2}JI_&vu!1Ze>T@J{Mpi7Hzd6Dj)O$C89R38L5|E6d|6
zwg)WF1T6H28jjo+vFyn%IdhLGuSuSz9gK!#Ny&I|Sa)I>MC~01qn$L`3tA0a+fO8?
zsHzqvV-|B;4YE&N-Ry$m$ZIop9Lb#c;!#VZ(O5BQp&4cVka@`%MDy!fuG0mu#A|ts
z6$~P0N7}X1dml({!Jtzk9Y%Gqqv?LchaZ4FqaJRf+OaKD?{J2_t{U+pBU&1>N5YXO
zU9udk^MmwV<{bjFl7&rdQx816l0t>9qW3Ir0pZ`>CjLo*Jn-7jQjuF<Z0dY`?qLcD
ziNgdI3|HnuXU2QWh6~RpC}yHlbzNt3k?q_ZuGYm9GH0g|X;GfbbLxZgPuX(cs=Cam
zr(6ZGe1z*vi)P8#(GyEIc7b?l!pu-};xKo>$w{9xDl!k;KR@4+*C?~EAL&;)@kWwd
zyXy9V<a(Etr7+hAHiTLLtX1DFfwcK0U*?z$5LFc&6!Ukv03r$_36a*5g>Ri+fwYQ$
zyL-jLi9#*0PdO#29wym<{O*F;#P6z^zf0;Fe(hF|b<wEe*;);~ixqJ<Zz$v5^8X0x
z6{E=6Zl~@7G%3D1bh&W#SQySFz)zD20;FO>-JhA*PN>amXMUzx#+I$LgI@BgVW_9?
zH{W}F%t?~f%%@75wZ>!9#I8UMjmvb#1#YpAfDM&aExpvqN5JYLXcART<14bnPt81%
zKl{PF0{CHe_J&8;>src%H|}wX0eZF=X0^+YPC1&l?qJzaf!20z){vEn08h1cKt&8Y
zeJGFY-dpwbVqft-I~^mE*)|h$*U4ARv_dYax#+A0v4xZjWsHr6n6b+}CH^N9@=y>S
z%kT~)g0LQoAbOrp!}gH3_kjkjHhQu2^NVfCg(C00sVDsg1`x7hRX2{*`1xQ*+Ow0k
z!7|+D`a}ImrhA<Gc3`@T1dN5~+oT2aJ{ix^-$22liyXQ8J7^yUE3jh7t2ggMz6)Oj
z8JUQty_Es<hI}OM??7gt*m%3I81%M+;te-|=2H(UGI80+xh6v=bc#0rd~4%uS9E&t
zva$5j(<-b8?eN3rpC?E>1G%H3H%bL)dh5)J-TbJYlaWGp@Rs*=kpkFdL-OsQxJc)<
zjZWrRZ0X#<K9p}{9z`lUpXT@i;>kW<^9xV}zJ3nyM=OxE$O9QKSHu&9-^~>uzn-Oz
z&+lJ~Mh3+tO=ZRJ#^@|(iL6|rt10)T9(_qJ&YN0c1+pY9T7&h`nAEGmqNzH$S$t@%
zBZ+(kR6?U2xyfj|VGiAdI4z8<lEvp8`}n7qJ%`^uiE+<5Wt@~yy41IY{NTt;CIAIj
zj4TvW&rd`2H<ioAZYrCzlPtrC&M}?b9dD^X<T%t+vCkiwoc2l+L=_)mIc1kYKliHG
zJ{?)<8ZOU5zA643IS@9#walQ|ndZwj4uvluVOOGtsw)C9S+s_}Lrd{SZ=+1NZim+J
za<``zN|Thb`_Ih)U5G_$8oK9VUDE>l-|5A08eIuZO3QsyaFU|AfG*Kv7w20ipUVB#
z4oWXILej%t4_cO%b>^dDYwTI)xB$}X9?0-zD?1&KM1DzMR)sw?<=X5GdJpH|f_dYX
z(7Uk-C17hh&Yu;9J(LZ|1HjlWqI^#VL#ihD9%LPI+48%ZrXQ#`(?+RYoUUMKOl9`N
zq-5W=DjZ7K=04{+-6)%oXlO}=sbS5&S+YEjNw~6IXDV)nG!kIJ;XQm_hR*LdjdvNn
zyzb(8NA0OZ>l+u3=FoA^<th5~%U8udZ#iIp2YW+tcGzF?EZI^5c@NMr6Upn_C+6SK
zzp3NzOnHPrf?|v%??)InD0+6FwGmL~;ew7)bZ?JQFL}1+%9O$A7TI?`9pzbyZ<PvF
zy_X!P>I6Eus`mG7+5s}1x-;`20FI(HbjuX|{A^<nB55HDB#$nwq(`oRAgpJgT&{I>
zZs|<*kpMlP$_-ogtF#12b;O4nSYA^4(25eq#<-4U9&l_kdSK@uZU+)wosF{e(ZLz$
zcmumSzHQw_B&Y$#$>6)Lua76Hr8)#xt9$Xxq+U69W4~?nVRr@7E|9-7;K~_}7!jR*
zBbnUO1CO*qKD)d@y-btVSqR%@HwvYJoTAWfr65on<L-BKbL3ci`H85KCku)b+YA&2
zqg@8Vdo~8>p;<w`0R+ZpC@Rw@9=d9m-ERYPvncc|9m%RGUN9=NxTy)W=<w8s0P*FF
zW@V8&>w^9eX$s8#I)%FD_s!FkS<8U0Q$MHWFpy}3S=RA*6)(E7IHf*y_OP}`Q&X+Y
zz1=G31bmD2bkdBq537@3UQC$zpd*kL;;*GKZ8*i%dG~y3`0jg1@|<Y~DMZuNO-*k5
z9+|N}eMh_v{^ae^kh8)=N1I;~l?~aYen89@pFvoh9Oj6;HLu)g=Hkofcb-?6w%1f7
z;ghjTa|wDThH0QkIiYZanmlVw4Dj1+3r+uzy|<2vvhDgn6;Vn-S_A<Jsi7X}MhOAw
z8enJv>F!cMIs{};y1RyML`g^KkQQ+0?&e&>^S<AA&N^$I^Y>Zn{HHhfT($Gs`?vS4
zF`QT&RM=X7(4nbF!zbE)psso_KbBb6`uE^2VdU%c#dv4}*Rl9YJCom;la}E;PWJd=
zx59#!Vt~H?{aKI)Dfz9D@se0wY#{N?j)LZT>_vFw8T|<eJ%4Gvbx<vD$upnF$>fU|
zIq1$D=*`~;oRyi_>P(1S-S?!D1=qdoai=OB6ZzOCcj`-qI$~BI4Uh}uFKTzu)O@AY
z`DP|&#h}Z+Q4Vd|i~Wu}_kgvs4SC`QjiOKJ8Q}Htb|Hp1#wy^IXXDTLSggNPT~!vp
ztY&>o)%s=JS9OQ_7dcXr&@j~&mk;y9iyeX79{HTpP2iR2G7~X1F}(YB++&gMTgw!1
z`Ajl)lUi+C;q^V~pFOekEx4njPiUxRUGKK8(ZK`nw!T3!rT;xT#ST}M_R-d}<>ky!
zSKr^(zck%r-${sp9xd17YO%b_!IBx}6@{p1$#A7MHkyKc62+ZS-J@|?F%G5qRICWV
z#A3-61d}=5K6h{NPdj`sN<94f;(f~>3(+ZofYxFqhyu1k{0RWE9|aiJ2$NHG>Zcbb
z{X^(<N?nLVs}{;beWb!AvsUpZ=on20wWybdDC1$9RDg+~h)kH7sATbGZ`{?%1u(3K
zN(A}c8r_P@s6K(k``fm@s%v2|Ma6{Mq^tN5s6T>N<b668sVLFI;zG;Rs4Eq+rl;@-
zMV^Vvr||gAxtvIRL<)(rIZenFb?fyB#^58=;Mg-bDa&{tO?zk=T{jTRIXW<_5)O`8
z)rW0QhJ#O)6l7#x0^71hAPLr3rdE|;URpi5qgb`2_$Kh5#envEX2+W#rI<F+o_uO^
z6$jPgaRH+kuGmV)C$Kys%UZ~FSul!!`yj8NNMa*`vvp-6qZEFTw;oi-g&SdZ4aK=D
zJ^xLT#Bt{ivzJOH7Fqi<RW<$FQ$W*SXsO@+XJ+n<E<exe_e4kKhH4NG<ZHEzuAe$F
zxxZkJIN9A?k^vm7kiP`tlb_3>@SRD393cC0Z^Gl$M2FBzR0@~x^x1_9Ei|erZ~H?9
zbnNA)j2^A0?K_`4RThsf4dS8rZA^MvhQ6-b&RN!2-9<(j)VIzGuM=TTVW}X8kN2e<
z>vsD#b_<L5LBpUYSOPWcX7hd@-XVSF2G-msw<oYTTDPL5(j=T81K73lT4(Slp{O>6
zbGDk!>z%hRR>F$J82sW3G+8A;g%+n&sg8<ji1&6&9qWtY-g1qniYk|su>F!|AK${@
z&PuwjBB{c#5?9>z-a<|RMWs^0y|#K%M*tT$OgRhKi*P+z5K0F8L6&Q{BD>t9)RyYC
zR%l>VyD@xt$>bxiT5hXcpVa!O$eREG!>&0<1NYS%v=#vJG$*u`I+7wA{Rg5d<_?ba
zrv`(h1IE_YT4||t#yp9z(Q8U<WXYtzP52t2(V-q|Xx7j2qjpqFPg7rcbtGJ-@Iz;1
zS$?v07`MJ*7B6!x=@+;J-rT$5k8ndzAu4W-$Awu-R^WUv$ip!eBzpMFr0-=XO?tgT
z{vnwk7S(hpW+<i9)PVdxNZQK0$UB#uNnf20q#N@iPIu@4@N-5Z2CHf<9aY7!o($VF
z|5D*m(K#n1d)>z;eic$wq+jlnQ?j|N7XWknd%s0j%tYrt!%_{23Pn<*;gOz0a;C#4
z?$47?UaOBcTs3-K`nQ6rRwjej3=09yO2<Ev?huu=W6>uSH?~s$vnm-*4YAb8nX7JA
zJ09b$@u`beGpuC)9H$SMA@AGz^dB|#C`~#(=B^r;dU~&o^_K7Z;z(?>#(Vwy-H*wT
zx=N4DGq7Zb6#Vckazp@}<_L`p^49x;s|#P_&z)1))+l-ke|i2#a~WH_68vEPfcEnC
z?%gerVz-*a)K|0)VT`^!KERSDT*!cqsgstj?nGpNXgfI^QDk9F8nI&4$f&_)4Wd2g
z!MPOoItv(~0i}v#G+4g#t1#YD?cYGLHD8abOHVB4hlaHMIdmz9YWDf|g`(tr4;DT3
zJDfx=;e5q}$lfClSf>+*0S@kD1+)*2PZ3Ecf5Oa4(yXj9Lpn2ZZoBatVW9Y_0_q3A
z0%S!Rklp$6kg*28nMcyu7d7R8Ro1N<W5-2f+FX>%<?0PQQ3<o>#)y&(39c5+fLBN7
zik4$UOMc0Gwv#V}vrZKD170&81O|-7_!Wgh9e$Z{(JiObkRjViw4V@@MTAEe<waJK
z1bSnINPh^5m6F|g|Ei(0(qGiS#Q15xH+05bKFHFqpwRr|*zI*{Ems$pav&L0Fit~l
zM2YpvmEdQw@>fM!T|dmR=OV0UG1o7HTqSEmp3e6hJT=i-(4U~V`W_2ZhDRN^^sU9=
zMy~FY+63Nvpv$bMAtXW2M}it}-qYK8o-@v)C63f=W5XzwFg@8PHIVsZQH*Je9bx9n
zabP6vcbwhtmnmQtui+$1*Wtu&qJ!TCe6ixz$Zs^^My#P^9p|frS3%f*r}x@QNNO~z
z1)1UB4V~sW?b`(2KZ{8tStIz8b`qpc@3Ohp;l#x**Zm<KU(2FA`Y?Nbp1@b%PZl!%
z!QiY<8j|0?>_`Q|>(K_tnWY?nS$gl9KCcbK2L;NxH8yO&HR>2W?1`I01{~)LYG^#b
z0;TOQc~nx#TjvSF)pMg2e)ou7>dgoZC3RE=eF<uwW9T5!!LOa{%0hXKrgFlGbo|sy
zlZ_A@;%Dn^fDYl0*qm1DnzkCPGwG@U(Blsyvq3=N)>T9J*s5#WC<8q3%dL6kAhp;_
z+X$y9h(+<&RsizylB8@qHJM1~^irXnlP@uEY8!RzBtkZ(kHPLw(zjWsovFgu5%;%6
z4Th(&#T<&Z^cW#<(jMHhz{Gdj=I?Ovu$Jr6C(HeciIEFrk!$AX(`y%hEVL;q<U7Z{
zKO42))8mUxwtZd0?yU0Egx&T*ZF%e24`g#`+3IsnJq;?ZpG}kT@(1@!PVIsE#P%<(
zNdsLE^3%Fjabz8QU#mc{hGqZKkj~<k8yEK3U#X{LK#wA<F#B<o%uze*g%M;(wyY<1
z?Yyj@%S`Uqw-{%NhPNh)@Xf3Al#eeCSv!+rK<Fv<wXAq3%H>3GD_aWEJrl?~MPmVK
zc;e?ib>?}Kh5a)sfU}LD1cDs0Isew0!{Auj3SBJOr+#dF75DtwPXR--FryvY$#uL`
z4uW;(iO@Chd3E2kIN2`7zCErw_*P^1#@<2H++zl@h5D4YSyutoip~AgVj+OL9AS1>
z=TqlOoz1ajBh3N*hEQ3`-PTId2YBMlyPcJ4Nu;CqWj+?~OCo^f;R#|B3mN}pA)J@j
zKnM%bpK)v;IT>m=1FTL6d%j(+U9JQXzdRV$q#$2=Ii+mI_BEe(sP*BoxZ!*V?UhLJ
zW|c}Z6yL(?<kHOhAz>?=y1plR$0=~qhIQ#27_CkxkM)8zTC7f|ldvyprGd&a3H~%N
z!-pH{%65!hfhK}bB7<($I7*mu2~(_Y8<CGmj>f~%L=~^vTK^q3U2P`#cV)IfYnub3
zjJ$8gXA7KeydYf3E#B9=jfY6_p<yx(3Pmw^AkMfuG$9rdd!v5~E1>vu0&J|y5z>d*
z=cG89NlA3ljmJ2+c@BRhmqW&tSnpme#)uznsa6O5vj1_Vu1c%9J!;US+r3_w1f(me
za1o^rib0YwnA>sN=eaE}OEJs3%Dvl+*uTIm{8b*3v%<f>P<p`v(_o<*jV^+Yfz}??
zJIPX6bY-B+!23rfKMn39QWOh549}TGjQwS+nY7n6pqS3$-LMyZhifODY%BJc74B;H
zIigLeo+Q@HZ0Mf)PiJ<J_JNKGR3b*#ae3B^KQ7t1lGbak{F<xfAMe-Zj(G_oSA`(C
z<rjC;!!75&8JS7?axp0;{XJDKgvj5<4L9>$W!qb~|5lp+!OLoeKbQLz*LnGBgcRlU
zx`Ljv-l6oiay8K070}?aa%<+K(r0>~FtYNjS9&cGwV3<eK~RpRZoyOEPJKvu<zMr$
z3P1XD$31dB-?5Ir!jN2gDG1ZlZYb*Rm8MLZ|J*#@WX3V6@8S59wZpC`N^kp+?1*`+
z(?$e-Uhdh1$NXVg`$3<B>kPOP3domzTKuD>R4FJxHbZ>c9=jvd53^{amMP#W(akgw
zY%X{9Xeq(;j38qt`-dgmM2CPgz-_HPB5T+0LF$gcIyfo6db(Onz1qu~?>bm#z96Kc
zA6mF^oJ^g8NkeCrOwbT+#%`)|=EPMAd6LQ7n+vU?TFPo^<K@46rucAr4%R3`Q2fng
zqCW{c&jp!3M0(|<d6$NleDvV!YwLbqzS8nBHFbY+_v^`dELekIU$`TcNkfl`j$v7Q
zSZ?R`kKS?O_&-V5ZE=S6k83e2=sM2=@bZFB@D50CYhSg_RmWrmRf_#>_l>(0Cz{7F
zoELwU^jW@rZ=96HOy+XEuq^!1$+~nUDK_&DOqaVtXZl+ESCO6d_`VE*5>%Z|$Y-0I
zK~dMH$l6V_!*w&<^+Lrf=jxgB)n2|g8_8y_eML{ar)aA3Jp%MMw_4jKdstp>WQ~(u
zEgFWZ+9tVHnoX9N&qZB-HSfEe>D$^*aka0@dvfM6{-QBrQ!o?-nx-**Cq&|Lt(psb
zCMs=-uX(1a$p6hNbbEU!bNu416%<muR-Fa?(HQSA+SqSo%q9IvSrGN<!<f$6lHR(|
zB^g;tQCZ1`V9m}ipIXXoqsh8H91cA^*|fmIS|Wn>bIx4odb?zOVay-6K=`foS(Q>1
z1XZnqBth5%Gv2jXmC>&Wx|BAmB*-NC_G;I*vY3srm$+tP_lmi&jvM>xz9!OD`Q2to
zkH9Us>t3>JIR28*%=6y()LNQ>^Dz1GJa)|yasQb102Xi&-qsl6=yqHBu}bS^J%42T
zU^Zf0<AHFquQnH{upX~Xn^TjRayvr4qv&mFrP{&7=$+s{lisDyWHB~+vc2|RpUGYq
z6W9e7W8qKoPukCK{th5m2$*{!=qpElO55QS+gTZ|vGNi+Y(9%c?O`-ZlL!MSwvZPr
zl1bGKC6q_ysz5w_aLes`__{9$q+*xT$qQv5<jGb}=MkU~FzfUmH}mv-MLMUkY4OeT
zusgO}%@bqUmyNX^@oq=s7Y%OPOiWYW;$}zsoU%>s{fHyvi2X*h_L)awQ`0u{_A#D;
zenqE3rrXBEy2w=7GPfHtBxH)IO~hi^W;ejqzA5j?p2zZwrbv`$C<@xhZjlYBs`4Sb
z!Aq9E-wT^&S`S*TKzrWgsZrNo$$G`AOU3axkJ8et<wLRc`ZIlN3RfE967!PvyV)fT
zp=LOj4v!m#5U#+;?Q3Oe6W|`CVOkB$f=rsH<-|O^v@D9%Z2h)mrisq@$9Fh(u0`Vv
zMb+ERwHP{pl8x?mv(2Y3dNjIaKR9dV3c@<r>;1$G5maQByuLe(UW|U<y>%3%tWw*k
z3xaI%#_#<Y^M5*8F&Y!pKr;c1nFZ2qz(Rp#;VcDnxj4Y3a-#p!z>$%z%wrobC?QF&
zmoh<pbq!n>-F)KvRQD>cEnaSF;J5YcL&2e!MC|+tU-4z&3ZxatLj@YdMIBTtS27{5
zywuX<?NEVc-Ic5~E=TLhg7negupsNt+_18IO)F45+ndKZ{3p#LNkrZon+!EToZ(Ou
zn)arM>jf8T9^CNv#xybO91B4EhG*5`z8}O78q2Qg?~W^tcQL0Q6CzIs7Jk=<p-lOk
zkz=ck*@_p7<m=a4t$bc*$7#gLd^X1U_E$ONwrN79C8-C~kIkIzpb$_HEgi5v`OQSf
zm)~xH$CYInq=fo%-loSu4StI^kgUeO8@r5e%T;tW5lmk8H7+{-h8(s^Vrl=;4k&b(
z>xz(%DJ$$f0`U7e@89KE7)=|M3ZwIqw_{6V96I{jj!Pn-V|VbRE59dnU#??Cw5`=J
zhnwZ<Eq|M(iBGr8m{VYZCp;-S_cRlJ>%X#A<b3Xi(!DNth=6Q9g!JgeL*b$gn2S98
z)no$oVul38B%`HvVukjrG+`EP9O|wnI^9-y^?$@h_Nr<{)USVt6qY6V?(D}~XCl-t
zQLk#C9Hbg;hQ9UHOE1p5(B9MR$-3)>X+Zphe?YDVR<5@7Kk_-|dQ2sp!U_7?!*AGR
z=GjRLKYoS(^7@wlf`XTnoB5tNu;k2n`7r{(k1{kokC7bvTB_Q*S1P(OwTrjyTyR`I
zxl7FSEO|n4n`Qy$hws=$UCZ+YWAFWtbCG<GIX}_u8vc+ui|vcg6IP~mqkR>l!gj_A
zH3q$1*J4A#%4Z{KmuI@kJKe9=Tc#yzEsuR|c0Oy{miA5Q<inA|nO9X09(g6V@GjgI
zc1I>QzbdDj6j!_&;_&nJ7XB4><k)s;bnNezM2-V*nLlcc@M+`Qu+uXRyRIsQi%)B9
zRK(UDd3EGS9-S5>ZXC`Z|E?8{sePg?)Skp|x0(By;N;3}eZWL#eW)^De|OBzATd?1
z%m}N@H^KW=MWX`Y1@c+LV%m9L+k*>1bxO%`nspn(_=IWRwQ+l+?@WH?b$%XFsvSNJ
zVl1I3r`WQet_#5Lute4j4hMSsE0#uUR<pvmr?y}6S9$%Rp0*J1NYCE14$!l{*I5~W
zF)6Pe!0pj$_ctMlo%=6;ssEVQf|0953ln#li^Hh3sx5ijsgCEQ^xrN@`w}nFX3d1_
z;H|)sn5?0Rj)3&CH5&h~x3M53x7CH*913Bd9V-foX7}&5Y+W<_xF9PFeu5tt<eMoG
z>O<ORMN$kLQ?E?dyheV_Ph!oYmSTg#6OUi>;^z(A8&9xUfBy65@dx-_R<?hbOFCYz
zuI;6E5!!?ct)~e+V!^lLJ_2b2??L4@7TqF^Wry~XBr&Wj--hTfV!n-a(V{R>z3tVi
z+*~*-ka_h^TgRe8%PTevw)vudt@`bXKUzk==G>>ZwF&%4uz&I;9_na~7>r}{pQ^iJ
ze4`*0QhHrOs^hS|^Jke2R>7$!Q&QWaNmiEiQ`bOs<_;@p3K;yJCg<k|Tb5`^bds3b
z*PIyXnL>U3k`Re-H`c*=$b=}VX=k!O3yn%TN=<!k)K0TOTai81_Up=iRez)KskT6@
zz;yJ)A0_Y3u%EK60&~JPGr3puEk_r1<5Tg5`sPg~UPYVDd%b+7efAu==k)4H?iNg;
zA&M8jIebllL5RY$T_^pA_d(b7Rs3;=V)sw?>(rB<R&IZUv?p}+YpW*q%_p@@=XRvL
zxQ2YR(k)6PTOy%$lLa4`>_!bvj_!Fc6f11j&d5OHgCiQMH_L5*L^>+**sn-Om;0Tk
zkJ%wOYKK{`ovz&uXZ=y<!Zs1qAs&PKfi&xHy#yRO@$a-0oDJa62<@JSoZjufsJ5NR
zdd7YIJ*VHM!OgnKb@st#3k^+8lei$F#JQA%{3ZIHdC(sf59MG|#L*FT(4JSt!LwCV
zpW%3Hj1aM#7U<VVE#HiBNy7|H|3FS1qL!LaR9KTr^n_@-(}u_-yn@9e!$gtD7}pEa
zU4&)nFnlyoYCl6VP~>TL^^}8^GonoWhaW>3343BjVn+DnBc3{5P7=b=W_2UE_EHn_
zuk2)|qPK>cKmH<q_Id}y{mkd|yl~v*iCNQ{h%ZN?zMD?@LUZ~PHOTo$44fWh^T`eV
z8l-!&n|=&UELHsz4&}(J!&|{a{7gb&<SAMyhs8s;h8Jn4{7aFxJ1g3ojmMSt`IYBZ
z7G8tD`C)%gnsBzqS>Xiw$3l8Fi%Uw6wl}2XYRGg4t;G|j$dPlA-rZ$h-pclA7uLfe
zwl&ed8^Ol}m6F*82Ct~rFCBDi#T)uIE+fh1sJT5q>R(K{jXCb3nn!jyVIyZ(%F2iM
z!VA}Sl@H((->EMyAe@a<O5er&k+(HXJlkqGoEJ`w&K>>|yLHQUdoJ5IZ-{RW9G1D?
z?P}c{dyOQMPBCku+YK*CA6=d(I$r!|mk{Z=t6}U$2gh3^6Dbx#z>d?<RkrUW904Rc
z0{MJ<_8}6qCudR&R}KZrKlIb*3-jqFrHjK~Yc+vm4U>H9t?8oshogK}X9Ln5dtM5g
zZ)EKqWHsODq;6CU?WTp=OqEnAI2&|HdnI$y!M868(tWx9soJuu(^RhWFY$?++3%G*
z+KP$!%K(p=+OgAXTF2+xIwo{$XVcTpm0%m1_HMY-5<X6}wwY`(R0o<4S{z)ZAHq!T
zG+nja*@ze{*VwJk-+r6SyR9sIR!E&@EB{?=KR2pqw#_=m&}J`k&}|a0A-7I?e(^mt
zI)Gf>;tr@Jwjum)De%7qnhlf1Z*42p;3L&bGUq(`g|We)r|!6Ki6apb7={Us$+HJ_
zio-m@sMDHa%>1E`+S?=DVpk{~PFBcZq08IS{s;NS#g)}rHGYo|dXvT0q~R7nziioF
zGH_3MHw2OqA|)l)^6i#c;g{Gu)n67j<HoP&r7K1+cA0a%5309YOw3Ysk1EPawSLG!
z3@b*9(|X3pG_PtkO&pPS-<x--&fC(`pjVcAMol|4wnfj7soobGYOUS0@b#<R%n&+s
z{pS;g|EZAvPnW#}E0E0n^7!;2Alz6z$HbAoy&|Gkb*K@>oBM?A5=z(Txu~Vd(q-*7
zF%~_yB6hyAoWEK7Ue>cat-d#Ktauon2KDnjX-TWs5}yB}*%DwRAJuBpbi7Z7Y*RWP
z>w7XVwa)fAxaE8JrA^Fe|54B-%6@BYVyW)Vc<T_KVTvVe;lEgbty>mz*D1cT-kj&7
zt<GVP^KLQGzpUO%$=1#Lmpu)fAAz&v^H`u(4J4smnP?z;Ij-rT8(-t`Yf!EYO2k0J
zVHhm!0L}z5{%K#;{5+;RD(QN<?OocmjdkzVRpZy`WOt^%nr`?J&pMrpVQ_V3S~_v*
z^mDzeXH+Eqoz^}*l7Hi(rsnO?7#jwk#Vkbo)XT8kVQj<mIS@YlJ-RX09Zhvm<fB{V
zb+o}h?v+sBi@4|^U!EvT7F`Or968SFol`z3_|ZieFMf>ce5ij(Wv^_GW1bsuyp}SN
zd^&8HUe_cpHdw2_`?mbn7hDfZ*m;Sg<7kpum|vJb>Gqk^PfAeolAB>I(q6zHi~8i>
zW6A~YxihIbSua_!^YYH$6_EE8orj`@FO&^z6DFGYu!#_f<M`%FFd;tz0-2dCGY)C0
zL8g*zBOWr6qO3-3<jOM=#vWOpGvrDXb3sLq=OEhbXWC56nuKSTb$L4LNt*gzb)!mk
zosVnw&f**{hjY1(VJ5!iUI*^PVqDT8nyF{&j`rT&Z=?39i&M?&2t>M?g6XCx*@wr8
zJ9U|ShB@XY`^&s)YJ-I?uE}gliFmZ8UiNmW^ZQRvdC4o9=$DtzTH<Zrmbi95Mp!;)
z@{s<T#A~%`>tCZ$tXbmS#Jf4r-J~^9IaQSJNm@3_b41?O)LUF#ns?YMKiK*=%3nkO
zYHTLBskET$_XMZCTAq&9F|xuORQhL6-!T#VG270EMxy--zfkr<qDzRCjabA*qD>~;
zLGjR}VZG#K7oYhVSNX>Ty6*6f*^72#P3u6?_EB9Ue|>wt#PShi(`}zpl%~4SQ94Jd
zUg1OokN!(NF_f22g|fc&?z7G2&7zSJ?Ws;7Z|l#Od7DKSHGRh3F3r@mKl$r^MNX~j
zu*}I>qx&qo%<s~Iw3VR@y5NKs6vO`XDPRVKlHTp!T5u#oA_Ak@?}F+O@<<LcCOWw3
z{If_j9#4e?;M%f`a^hbAg^I_R*az~Hs|ViGGEwM4roAe1L<$XOG`jHQp#^wTz?<`S
zEj^t0Pscy_w{Kcc6d8fX0fU_jQXNi^>7dvffp`G~f+W&K4ZVQOzeAqX=-TR-sRE$R
z_o04AbnyMf&O!8>3+P6`7Tb(p-8!5wQlMQ=|80vw3Le9_8lfoweD@)l3*DwdLKVB?
z1;kps_3Ut+-0Z6h(G!eGQv|>?jcr$c&oU)}OM#9Mf<s&F^=)7$@uH*v-Fd{BjjkSf
z$nek;aq(wV`L}jp2sDvHe+TvrJMuB_53Ti@qD}W7)HswB-&sh(S_^xI?77jFMeezm
zFMzi#6GK2R3tAOcj97AH|J;7jjMw63Er1Ao3cr1+6$&!ycS`2ux+iO~4$3~>h4KAc
z^9L2$lm?NdZGfjdPaH%8*F!bmc=lk;=^Qg^S2S%kQ}#1Pel-rEu@a9wpVgXj=nRUA
zoc|OKYQZ($tk!)7TBb9HqNNoeD)({7(7$IUecvMs&sRSEMcuSZ;SG>WwgAV&Ns4vG
z{u~V{)=d{{vF-O0)<g0|B%ZFpILg2nj)$<u4H>YIx&O;Tbh_|qM22L1`N!@NoDpp^
z21%6jYS)#qxmBaQ*yBGEaz{F$(S?Ekj2EB<T^~p09B6{-%Rtu~(#p~9#hUq7*o&UN
zud$jOLnpvZqSIShC5(m@`D^BuU)M#s8`NQFFUL6o=yr6~@sO!705G<q7t$7qkEY~D
z37d|+NOAMYjPbTw!RJepn{~62@^1H+5o+ZzB4^mvBPE%o4V|oGC7#2NqA}vx09GC*
zcLo#I2CN2?>)Z$UP?5{vQS^FnpFX5SY!GZEE4S4V7+n0u7dkPT^2|@Q9o5vXQA!m;
z5I8@b<=hpVl=TEK9=<m{54{1rZ6nQl8=>e1oAWAbfQ6O6K9&OzDy1TecwUYUr^*Wo
z>IY!SS7t8g8B8d2@E@0E8PW$wR&&#LP5s4Dx7o{K@!-wEMC0N&g{o_mW`v=2n((~O
z+ESM!<W=D4cQqO}(b2{0|I^X8K)XJG-3tN!8eToFk9v%t;*UXDaQ?UXjs$HVrb%M(
z6N}%!LBo#`&|1F1?n0nR1dtLn4=XGN<zv!m>*zuRW^`Yr(=X1_pq+pOI1`L^M_nB-
zEHb=l1Guw^PXx(tWSblOEj)Ai8<@2fjB-k}c=B&?o7ZVs7?gv?x(qZjlE@eWe@i&J
zH4ZT5PJm{86r@;MAHnPaJ4gph(20fC`Q{Tfgy;??q4Zp!n^OV0;fb3Jm8vJ0;m3K#
zCo+Jtl}l_zW8$UIA9K;Y`U)y!GVXz{40*Ctz|b%L$^B+41tIT?evW>r1Kl{^j{3U+
zcFt$;ug<=h!2v=fx0BZGJmBlS>teY%d3m55)Vq!OFo%FXOS$<BL@4Uy6haem6TN_<
zweK#P%j&Kxs6mr_K*NM4Q(<U_lZ}32{Cp@Xh0F0Fy5Q(P6%+R{FjmoDlnF^+6q5%f
z-lEW|%l{=1x+}tS8u0dS!2jJ{nuUBM0Camy|36w4=v4H#O-#_G8wz!~eQyD;=traB
z#uT7i<bNAYgfar+KxW`VAehzbCiuUM^q((oz`<!FT=do=(Gr3B-|7GJr4=YxMfZ?x
z;q%8}^2GmtazNw%PaJ!g*<;KF38Q_!kk0nkHUNnIzpYvFFxP&$mwv9bA8}{b^WLn9
zm|*n%x11>qd)i}3e{B1oApNkNAzKU}!^A5Z!8cL~@c3qV=uh|jPe+p$^mkDLDjbQ>
z5C6j_kI=O?83KT(I_F<;uA&PjV#+C4{+Au!NrZz@)i->C4bUINk^C)ygVw<i@X&OX
zFY>0OlnRWy;E<De02n{Ndpr_dxdd9R+<b%Te_E|@f^Wh!(AuP+7f1Uq6I06EbRxkA
z<2niqDR==nA6~SUxoLLuFVSc}nt-uMgwr$00lpjdkWUG1Pai6PhtS3c*=SWj5(35*
zpju@V1)Esi<)ugCwg2;tl!vdl!Pw!c{R4PFxH+&XnQpA<zY-RF<3WV~r#)yKy7BqJ
zNm49YLH;XCpseg)<zlgN7u-R}gQDWmWfC2rJ&PdR?)%#t=M9_~2fnPA9Qsz`<@4(m
zdP)C&R|pj20XHPYWcm34`ddJF-@4>(w9>!gZ_I<H!UC9zJcPa_2W&!6wADNZ{l>q7
zehY&IUAu(Xr|?35ZQ!o)A_ICt1Ow1lxl>%|F7^_RVBCoBh(|3y(|Ehh`%`S$b$K~u
z+yxJJ$D7-Ju6;oxr}Gyh90yV+oNbrVpWE(^v4;=pPn-h-{eM^Tw=h|iX`@X%L4nl<
zN_G~CZLs((s$|_M)ix?hyM|*%NVDa5ze4rsrzj^oMx*9br)|QaRy8lb4!eD(fcakm
z?}ggs5}gQhx1KNF%f5cD2G{;aeRsrum{>0ous^gS@&f1w{GnKm7VM!7uTkr5uX4Y&
zn&aSbY&z*2`PkW~2cUld4(1EmulyDZO?EGSY%R?kwf6gn`+eJLe(ro_any9aA?Pe)
zgNsbWikckl{K>uV;IK^iso3DLJAsXJH$9%8Vfd5vqYSAJCM#kk+1-?uXDbaoL~()>
zf+7?eHV?(V8$9~BvU?c8Z;G0JMZ%(qZR=h4^)tb<h1TV>r|upc%6Yr%&q<Jeo&CLX
z?=W9|&BtVh`-UgS$KJyEC}Bj1ErV#b*!Kgae}qLN#i7&k2eYlNl;nM|j`<ncr-Kts
zST&LjR=K=yUqZACG|33>emk^Aa3ch&*RiuWVKVD!zf@}c80%6h->%`>=vAqU51V$(
zFdLK===C#=Pdxnd-H<Ajs-eoXtKWI_am-p7L8~Lp-|k!4Gm&j!s`gQ%wPc)zB%;A{
zo4XevZqbdO($SNMB{){?iE@LaB5*&*lSsJNXd$FKT8}jU5y$kfiHxDnfgTaQReFiR
zAc1jP;+=`$t(32fAHjCswP9mcLANedv`IL02HtOd`@;PC)Q`+=Hnba?aqJ(9-i?Py
z{q6O4Y6;=OXI%yj!Q9)*5?5KL1N&Ef9E&m9rv8_cZ%NqIA9W`%m%!=*qCe%;g$dyI
zJ`TXY)LYF=>8!+-596X#^dor~%f>#?xfku)l)z;Wd?CFzpLQ<%)VlUz%hs@LzsZ93
zFxO76L608SylkYAB?+>9fu$@S^VK`dA!V@M%lm>DIAFn;?6=v%%pAYLHH9nQoXJ;j
zyOXnyu_k6g;jL{x0&?fJJHx)PqjbVic<7!`g07FvHIB%^l1l0in`-BL4x`SE8o-|-
z;Vi29MJA3mG1F1CzPc}l+Tv?Judto0&g`yu8G19WD6G^;Lkt^8A}Gi0Y3(zyUDO7h
zvgI_i=$4$8_^Xn}M|<{4_3CI>2jXVg6_<qkb{Hzn`itnBt;ubI4hZ#*Y$jvaSTPJU
zWhi*IRFqqel-~&RLgRvMrQq)NlOdkOWc2B}*P?4!IA6L4?e2)T@Kd9T6D6zq9_TO8
zU_W7j-)YB#-9c~nB=}ZmrM-YN84>MmBJ5WfTExi2Y!j}tmN}JjO?DnTLr!S~gwFAY
zdzDI?tF5oh?@_+Zd_i4Iqz)=|{n%vwlbO(CSf;s-Xb^W>ibYvzpR!?eV?b%K`^xb>
zu|ajbxg5ozQfVsT^!Wt;?g9(Otmd=w!)Of~`nyf$!&U-~+X9sZpUky8_ZBSK4t4+9
z)Jgs2FaMpVo*Csl*d@3cieX$ZSkG#bWqLq>VWuhHR0VaHhSaSMlK4lbmaCVAh)fY=
z3RceSe*Vb>iBGe48zsM5lSOQB?xiH6N$UehJ8}F|kp70W_iq=6nT19Ao$kd%AG}@s
zSxM6OqYSg8T7jIC?Wu?K_2sPpv82b92*>I7&|BlSI>d@XV<9<C$O&V)Z8(p;CDOdC
z>lxz<Y1+rCZx$n+<xO-jS6MYL)&}cBgu_2jm^3VY9eZ>>_$GvwU#$Y`qL_(HQGcrv
z$~U%^!soYZ^NCux5b8#@NjE}IZrAbA;*3stcG}V@uBVd3$k{G+%Qm7?Q(<Oz<A-1@
zwC&CF;6fT|r==tPyE{Blud?rSI(gyjdEStT8Ut|eB3s!f^cG(;8Sx?@u|xKAClrd?
zcJGAPtpw<hW<{Sy67q7*dOk|#axD*7_H4lntBqI5QVsM=jm^3Rr)uP1$+uS!az1!*
z1k3z<aOiSrPl%M6s@tSBkJ0;R@<s<UPYRkk-2MO|A|ajK?Jp#ar%9L^7vQ@vU^%0`
z<iuMEx%<ofLPS|Mlu^xwNA4Q_O)oB(J$5$2BAdAMd+1jM<1(V6&rsc850OsrM$O~A
z&pAcGND3zey(MZL@Q%^d-`th!6VbKT&a>6Q_@9pfS9X!Yu+fzANCjLuPUwtrvt1aP
zD}%Wbo>w{<{0@aMvG1U2Wk)EcxM^MwkXAn!WH)`)&KWjYyD=t}%b`8EoFV<&jWX$>
zCH$F1kaTDsiO$vK)R(i_c}e9%H<3jMoF6h3PU>GaD2?<})#ezrGr03$a>PvmvOQbS
zN1zOgG$3#r9OtLltn1G&I$UxJMb-aaZ;Tnl-xdsBWj&le6>+uNkb)>2c>iTPrc+;f
zjekYg<1IRHm*V-Qb9$BImEHz?Wc&Y3!^lOX9W7ExrN@jd8@<#(nN@Y{71CQ&3)s#!
z#I_<Q-gZ$eh<NT7U|SOS9-SN@oLPrNRAei1<sMh%JkMu<FDpTF@T?dUgV_mn<S4IC
z{fS+qJlew5C>MrOdnz4yHRJIQRgcS(y!2%rH#k48)i`Oru4#{U*+I$Y*lU=~j0(r5
z;`vZSMsmL~_%io4HiFOiu-P?i13#F6sMCqdBg~rNHaL7|Y!yiI_t<PelaCTAMA+sC
zh;%dEUWnvfU&q5e%~zOOY8l%U6LRE;ICD6ZFWi*km{wTWdyoc+u-)WfH|b9mDc^nv
zC)rtxVqN^U8X<J4qYK1l%LeZJt8Y0Vl^(fQf^AB;LG+%NZeT0aOc2SwI*rg?9MH*i
zRyaF)%=m|3L%^r|i<yr5uV<qpqfjQ7Nzb6h$-@5cGcMc}4Q=cr$MWN9ce~=Od&A65
zC5;ynhiNsxr4BbNE9+)$bw|ILlWkhhtd-pwTDKgo`k-V<Fd~DXDoC4e1fuims}GQ*
zrRS(nV6_+?@?Zw2LKJSZ5SsNV7AYf16-liVMos$OnunUvd%<#Cp2|MuTy8*}6^P4R
zh6UTMXT2{fYw_}Xp$f@PD1*JoHvx2!=88V|s8pKaD75+b%yKG`vumZQbtsYzkyG)R
z<Wc-H6P>K(nGjbO%QiX5a*8!Sb2-LDEdbbjqpn$$YvS5{yYw){$xU}FKU#>q^!!tS
z%Kg%9%e;O!L-iNu=hs&OtcR>wisehaeJp;l%x*)t<1*ab%WJ>mFbpa@{9d7KERhz4
z%tub+*}pj`Ha{qlAPwIAc#8HhXeDp}wywam9)gysoxAUiNRZ~vu?6d;$#rT}T0@1X
zi&9~B4nCz^<?a>vDYcK*lQC85B*=ftQuN?eeZYZnuy{Oke6%_BP%>Q?nW8gur?m20
zrxxL|&L`SF;>;pFrZ?g1=>2tzj(P%JLgd>Q%jTVBoXb2uibO<f*wvArAKxKzq&e>0
z7BXR9{*pVRA$FqK@=Tny9yvAa>aj=FS;;hVa{oqEGu;kH{W+l2Rzy1#c@Hz7!L14&
zA@aAC3eAvdzhYT8a9rpcQ`O$+bn)~_DM;~IP#>463Kh{!MU43N7uIai3kjB0$`__r
zL6yp4uvc*mUrQ%G-PCWjW6NzIQ_?Bxd)=M2|Hmzw16b4!(gY=YsU%V3%?JzNhe>{M
zzF~JGFf0CjaR*W~bNnF(oHE<J6%K`K_B5=#F4A?`=ZI=<D|lo6N&zwl+_T-?UEl&T
z>aRCZkp>QKU%JT-atYdVSkB8BSGy=KbcV3NTd)&f#-k<a9U&N+E5t$YW@rWU(E02v
zLu#J7;aX-tPx?pY<Sh7x+EG6%#FJ9w;Gecs7@;i_QZ94Np6cmQ_+5a7BMlalZhXh*
z;MZx6i|Xj1jBvnz;12XrJ5O(io6&q*(zuzx0zCC|yOqy`7HNMXfT(2h?RydloX)^O
zYuNJ}jVl8T<#RiqaI{H*NP_<m*)@O1ZlGojkv~7uEUL=Unp0z$c3PUGQ5>4i^%VX9
zTLR;Rga|UBe|{1I6t;i8v(}Bm#>TjMh%}u||5Lv{{_bPjE3r?^h9qbmi``5)zPS5q
zJ$w5PgF9<tA&kgr*`anrXJ+A%KRvZ!8=Pw?Bj1ZYOX9O$zjxJsroJrK;<D$Xz1N1m
zQJ?ZRFADKa4u@IluSeVKbENizIP=#s<BGf3vve<>mL9*<SFL{|8Iw0t;S$lRch=6o
zwZC$DAXe=^C0TN1-O`#URO39qnkt)?59JG_>jqLl`~DBD1cHjnZ-MpZDA_PC7~v?T
z8LJ0u*2xPR#)YO>N(&od&blS8G_$JOuvW**U?QaWy0?ddCTr&T59@AC!v?Fu7p0Xe
z1LX;Nup^t<7@(4d`a`+wf7h1X?4dnX_CUXTl@vMbmlU|;<2jIJZJHwHMn>G0l9$L{
z8~&@$c5$y1DrOz!$MU(el0=sBe4b&%QuRBcycxw7Ze|~gsz(#?p$8z>oqi@5;mvtR
zzH94nvLhAT<i4;ANmcuxBQ=#rI(0BASCgdW9BM`=0bGF{Wy6F$Vbr;ffa3}YwWoEx
zUSS&@17}KWLH0P28TFaZ^}B|mp8E)&vjU=SLq%t)U#Xo=X`!%3dp2!S-s`~pt<kUq
z=tGca{2Wu^hpgX3VKbVkgAuELNRVRZ{>mw#XlA)k0?e|_@dx8Zq55NDNJH>$H<+w4
zxn7s0ct6R*j3LxfVuj~!$E~s<%trAeJuE-A#`PwU5&Pa1ZxJ?-)qZaNBWQGgEOR_i
zUnNK5sW$bvF9Wa7`r4D<b%(SZX;ClUo16}osK5Nn8K>RGX<?DruN!qo7@wX&5BrBE
zXVl}U;uk?hJYhE5Gfua<kJ8b%2bGf7gd=ta`qD$s=>-uG-_E{><t)_-sF<i$FK(wA
zY&)xv&0%fFGBp|k{1c)Nb5)jYexin%*q3Rs0k+M{N^f*F<{HhJf-Di0(jzehUn=OG
z>Xg2dO}Up$MFAabvj1aKAUkVnC|_q;1|Of}j(c{tu`<vwD!RF=-o>gA?+8dYBbvj$
z(gP0j5StIbIUWm43^_=|V8j(P+Sb~AK9irguT}oBn;L?Ok~jkRGg4CR4hNp~$;gs1
z&WXqbXFAsr-k0}`He};nf7lgME9z6f>_h#SfO;;$!yUGvby$PT+1zTW1aR)|;;L_S
zw3M~Q#UBpu@mlnW2p*)!9zW?Gp)t?4sTpF{E+yee2^(7Wd2k*VZbpcT$S;-Z4ewN~
zwUVQn^}T;yNAn`zd#cDkQQqwr5ewP;Y@YVMlu5(OYb~M6X31nB)+bHTe#&$R<TWfX
z`i`z?-^mA#eB%g}w99o2b|1F!(hUDw5A|MDnh%=jRNjIAf|XwDgB!%*qU^I0b*h}1
ze7dQ<@9+4N-Z1&>4iOXpw6X57%~BN0!DnEjULxP$*r-3i-M~swsHeTLQBU4W0Ke~1
zf^!oQHyHwQx8?8svTAGXa!cc9ikL<plFal2T`A>_lvM_2OIEHu2reVBr+_PO+JOv&
zl|8<U5F+>1ON}AjO=fX^c6m|D+oaJ^7cM5tPFFdc!KeFBVXDE80Nw}v@Lu`on;hw`
zXyx*w$s;=)W@C2S>F)Sgy|BKrW7k(CI(@Oi!hRm!RJgqqTgFdZ4G9B@TR-P5{rbES
zgXx5SQLODZ@}YmNwxlqfn+~ZTT-}|<e(tDSI{2!S%s5q8B>$@22-s46GNjtW*GrGj
zQsj;MKTDRchg_eN@@wjXD~;Px%<fCcCNXx^LM6{}pCw67#Ohg1z3|X4(MPPEvdr*b
zIlTxp|Ik*Tjd-npz0l18v7!%%ZCS?Ta^aVUMyhnH=YAfTg{8@(fes&^E-j*HWHq33
zzv-t;&_+&q{<@11Ju4eapVO9L%`4j>96EB_q17oOUQ$U2X|uv7&b(qLg$|vLs~+tw
z#a&!_!g9LLjAkd(y?AHM4u$fa+dxnfsIMnT^ml$vD0yo6K|C%}W8HsqSRlH6_q^#4
zTM#(vHVf_8^{)598KaQ@@mD?}{b@}E6**<d_SHU~6<=WyUMPj_{3jG^i;q7YLgXU%
z{n}a4Mq@<IZ~wzJWuc+zdSB7R%O$(+)A+<S8s@mg#VshyAyC*64Bby}#EA)r(}~yu
z?v28>#>9|^6t8S^(kQ-gV+=+?9IY(iGj}UJCUITIJea;+MXONu%NACBrAw3MYMPWX
zQutAgJ?^}g^o%f#?!NX<gW}~^g)5(P5*3mfo?f9QixiP{Pc5L)!W~XObBjOk&Aevc
z+9y>KFtw?^&kA8*&WT#7-fh&tN&IG!idFaW3oDL=TPlb^oP(%|w4$a6dofW@Tmbv(
z7860bqYgxY;d1#}aUeiDsC=;F$ad|1mGhg4FW(QX$9FfA!G}A0Hd)=Z&w9UXiY6a<
z9^wLRuPxFpc4NpN0FynoLtl!vQnzEUG2;MeB;!dU(f9vi0i^CxvOLu?#Nn>$sMKhI
ze_F~i!+kjvn>rPlzPvC{mVKtQP`zc-nyVJW=mmM9QC1aaE>*I<HcJBhl*X(&=S+xT
zKU?fd8i7l@J#dcj4_``L`_C*Ed}Y7eEp?ws@jINv-jmZ3Q~Qy=y0)#HYDJDBU0>W7
zd%|}Vm5Ukz*64%-Ir06{=(QfCBumhrE}LITGvPfA`toY?_<{>Ib6HTGGq3X(?LI5v
z2(x7qlP*siK^py58H$*rA~u3c?lon7oT*p(?kEcPa}`LFLgeL*3h+R4c=>j{6Pm+c
z@!$3jL*1u%o+Y^+B<yd`%<ZivZS8J59YRzz<gCs3R(j_%BRncVqt7DjfkRgdkT7}N
zm${=-Fc4cQ#<cV$Q5h|9FUB2K`=W;%g~COK(afD{%0^SxDc84oW`;9Z;QcBpR*Co+
z|H>_f(7o8Xbs!;&@iANB*xuxzFB8#B6~sAJz(YP_F006@mgHDBcW5Lv!bPxLyAR(#
zKdUZ|dbZX*;!S7e(@2yeJ;r7<6uulO|9A6zHn(46J9PSkE32!R<$KOk$xO`{b6u(O
z&n+_7+I<QkOFhAxYZQe^GkF#fZ;HPtcr{*dsTS+Fl4uXQ$}L_wPx~Z98)_J`JyZ1i
zT%R}e?SUSuMs-a#7MPw4xsWr<hM`4pUlx_=#=1GQ6|~+=Mb;ak4~;7Z_y(0J6Rvhn
zzVSIY3JjWk1M>0CO>@m+y5z-iS(<5lFI}E#_pEzOKkzy4>OVVUmb+9cP8IUH=dTFM
zk}e-C;w=c$I%d!?gul=NaTWp!?W7zJeywkcS@qosJE{D}v1CzQ-f9iHQklQmkNQtM
zsdMUes9gyTfusMq_zcObE>m0*Zd$W8A=25Dn|s~)d@!2M+#)*lJ8;>GdR)C-`5G}9
zgsv|A{Rh{*PsCrk8b8lIffsAb%E{OKq$l*Wq>E!4VO@UZ&sSuufN6k<;Atq6?-NjP
zF-*dqi9*()<EVvnra0qPg8VPPM$nc+n^H_Udt5bSa_t2JIUuMcZp$uDt2*4aT-45+
z*?j2Ht;aMs`h%7c=#QM}#mXD~aUbodnwLyHdYM&7<odeQtgobu^{na4#p;qlU%2L(
zHm%&DMM1&Nm&Ik5$r^0YhQ~h|<cgdQPLXSe8*529kg(bK!!+#=^$fV|%fY!}%;5Lx
zM1!8P>dyM%jU`zV+ODHE*+yFO>pXqzlmS!vV`?bfBH#Rxe~Kb+g`MFAuHYqBuk;m{
z?F^M)&fG^_tW14({ow)IaJ@8r?;P*xr`g$auPC%BCnb3>hqXbfBBVut%M)(?I;$%K
z%7?=c^6#F|PAsO<0@2N$^e}<TMx&`ZF0BsF4bMLhRXS7pGoqUk4p%z4h|xIbS^a$r
zRm23tA?>jRVQXylSj%Or(Xuk_A~OZe%}=^tSaSzTs#rPT^3G&Py0Um{^bIKisXf!U
zC{!be!U{*M>2k6dJX7_u=L7EP5bF1hdkRInr$`#ugB$nslSD;?S^78JxL$_NOMchQ
zk(TSNI}PO%<G)C6TpgRW4X5Dy96=ZQToPrs^_q+V#VFI_B;1W3B{l+O5EPbr<1P__
zLD}bJ9!sKcfob5<bQGzVA<oEximrB~;nc#dlFJdZ*y>naorMMGxGIeDRH-$p>#=oK
z@{9H&ZX}}uEg5j}`o}j0lLg$0(ifKAkbx7kxy>|$iGJ!33+^rv8NZ{@-fYP$;LU~`
z<SNcGNFX*88>y1fPcb5HbvhZH1{R~w`*u3|2E}5hWPTecX)*^oMKB2MZE~Qvny|xX
zZWbO67GAodIr&!+yfT}E{Qnp8-;=?`g!k(P&LkgYnjJp_UMbIgM6>;rBqXhtD8VY>
zB?wWM8nN2a-Yy5X?;~6-eEFM<f*gp@)6RU8Mkh$#xnsi`Tu8-R_t`3ggK4#)N-4sg
zw@-0p|Lk=>>d<eBp6A4uik_Zmc`hi+cpDFlQ~QqC@+P=nkEX7)%7zSjP_3uv06#iJ
zpS;T-%gFHD4i+OD97%!Bom8(4j<A{!;S=rP0+uN^LS@`A`A0O953$2YZm!HR?+Epk
zgzjYjm^JN-Eqw`w7HZ95M8Eiv41_PLv3GsZ{8d5w9(Xf<{`6(nyB$6Tas1$+gote0
zZ8%x=r)RP68o-#pP`!g_zM=hywtkiF^?c|b?hw7t88nkgXu(YSt{V-Q6a|wHym?I;
zaMd&wdL|=v3@10wsY(3;wQ{<C)+rZ)0h?nqC-z4w-$XGg0JGw2^*heotV;<)#^sAD
z$|d0MVl5qz0XFwxD3~gl7#z~+y^K=^BXW!g&gjaB!s=lL4EGN)JeCajri@@Zt-WWq
zH`-<djP4u)#{M1f4ir_m;!S?jdi=!?V|_gRUZ>NUpspikxEY&Q0^dzCYY*(M;W=2e
z4LYVkX2<vIRk+!(imn}PxEZqH)4iLQ(9+T%`2YOUt<Har(Zp{7(@#xZZ|N!{HaNJ?
z5Avb4G3AylXo7H6{uD(Y2_9H(twWWij2a|4c*yCFDnzd@m*?iP`bcMqn)bZ}6Ycnk
z4Xl>*mk|H`gWP+lGY|<Gvh~yB>8zX^z`E!lLEhe`<M@ac{nzwgdnzrN1s~#g$Vf=-
zo#iGYJUiWjfB}y!pI)Dk@L|<N6N7dO@R?f(yyZ*{GKHM0hJ)bZEZElOroZmCEJh!l
zpz|y9KYRma_6nbhwI*YVE4M~~hj&=Y56Ole-P5#=ltb7&5Ff%t6W-wiAU4~l-`(sO
z<7Hh>*I+zO!6=EoX~7|%BhY!OzBI@r?={8nhV=)0x;X7$5gmPp-m&c7e6S`j#bi=A
zC&4|j35788yDw{kt{JY*=j>PcuRVo(=ZLB0MwYj!y#y|g+s>!??(rC0d&_4lf1_e4
z@8;Hs6AK$#f-1)~#Os87H}57QzKYurN;%8v>HMr0RdKjy$gIcrv%U6BV2RdmrRsJC
zu)7w_F^unO5WTNqJnzwm1w$)MI%YS8<gdGTm=J%Cb3<DlCI$r=tjT!Z>S5I+Yq~d+
z`Cbv3l*3+T65Tzp2Di|TSW#K-zY~6z%5l-u#n-wqJs&95sWQ##o`K8{uG(56h#&FT
z=uzgfCBE)veb;YD_2KoG=kw+liroSfqXc*B>}O=UA}QY8UyHHAxt0SBHRguwulC*I
z+L%P*3i{_n4;;?6{f~Q%?!RrmI{qY+9wew{5Mx@v;F|(3qvn66@Q?dH>khv+peEl8
z<Mcz+(@7d0K}N8b7YXPg5J_5vYb|9aYzsc3S3m;)IhK}3^C~~s>#MI~X!<T%m+N}H
z9asH%;#roK4Scg6LmnCq{pJ!pExAt(UfDnDi@nt^uZAd!D=d(=BYiwSUe%R%>=k*g
zSJ(|cV-J;1d1eceH7^E-K4^^aXBvWDiXR0da%AVm#BXiPxjh_Tv|CxRpADF&-ha(r
zDRQzZMjeC1QfK<gW3%<p9ay)fmq-I&7WgL0URMDyzdhi_qoX#KF@+$P5k1pGs4w+u
zbSTEP)W(9z;!bzL1V~R(orzA~xf_n!Z-V*(?~7Q<0T)C;2%7mG>Abv_8x;jvxx6bb
zN|5`Oz8tF={-q?fuS>c*TWikrPeEv9NuljFnMhTNU1cu;AU9(L*%P?QL{>C!J0#qU
znAQm!&BEA3U>%YR1ri2JTZ-pg_TNl+wui7maH`3)Fh4q_9b8AVb-(}U`ND+7S3Vv6
z`!f`sCVzreEt3<z03srrOS~h3v!eARq}lY%rCvPh7GA_1MF)n0?oXm=1Txh>D`-KY
z@7b<N3^nJnpZHSjk>YkAuJ7vl9C=mXIfX3CSChTO^k#*Z7x7-~90`orsFAWa^6~v*
z?8_3=NHwNDWi2*NzBg_O)R4VO%@znkj2+r~v1db-I3>lD(dMBYD0D#&AJPgoeLibm
zKK8j_W6t{nCISsdo0Q`8AZN>CWk_H>GBNMW)W3cngdAs%0Pz}x>mh#7(tYW};Dm?7
zzNErqn=&XN)L^Z$!gRgg9aHh7$o28EY`D-lRUA9sSl?Vmr$bw}cY$*&1^3l3i-7xD
zKw2Y+C%&Di348gLa-F+l?<<qnGfJJlc>cPrB-k6BKH_r2N}aZG-;)_;)z*U5t3RU1
zJbFbQ+RjS%Q(aq^aI<315fN`-Q%rkf(QHaPfVL@(Apu6dReq;<SZ-8ZruH!ds~x>H
zVTMZIfoE5fp|th0G8E-~JwrKvN~~6YO;vYmIpHMXc{3&4u04Fhdhk56j2Tv$;deby
zM!yz$AQIxh+u>vtF!QjT<*6@-0)%x`lF$-A<si*AV|OypVbvn=DoXTAWv@rOUx{rO
zExO@m{@%0>%lS}z*qnpejp^b8rnM?;(m|x~U5n`qbq;*?Vz_D1l>s`G9Z!sG;@|2F
zw4YwisJrOr{oU%8=(pLA*hl!EKPA<?V(L7Xfr)^$PJn8=ZiiFZPRhzZIlkf4pgUrE
zNYf03j@D!G^l%>qXAVsB6kB+JkumEYtjvqzO{{S2L;#Rvr-6o_<#>6}Bett|X(=Xx
z88hEIZ=8!lAk>PTmi_GvqB?4Ysq}4q3^|XFIBAreG&<aJAJ<C7p*9eqD3-Z?HH!r0
zXGF-ZliZOB)B-AejmM@C+@34dtX4&<Y?QZkyRE{_)=!4W;VO{x04Ik&Nd#4j-(t~?
zmre}CBTZ%Tc^Gz2ORuTe=TH-~h>WR<=$^kW?C~jw{|9?-8CGQ!t_=!;f~Yh|!=_uM
zy9ES9x?4&>8blhUyQP)x?v_-MmXek(>8@GtM$b9(&ADd&%>0?}To*r(?S9`C&wA><
zpJ%0TnLmL*euVYHQgQ2P+@9d<;(<1o6PeqQlxj`_&Ofpk$II>f1=E;}6q)RmvL%45
zGK;#+CO|q$?7!1d^_sX_LCRy~xJ+?mj+#G(Qyc5Zkqf8<v=nnUstW^YNH#V&3X_Qf
z0ck~S=reY4lx9(Z0lG_dno($9<kDXP#mnsH(m$IDvKbB>Z-cO408~{b3=mIDVf94S
zr_k@Gn1=1+g?J?^y;xhPD?wbG(*@a@O~;P7L}2?B|FDld3Nlu*wR;R#5N2Qn5Vlh1
zG^dImxRwD=Ob4aM%&sb0yF$6{4RW^yNTGBsTAQqTqPt|Xau$vIwKpi?^$f{(U0Doa
zbw9F;w+B2I666-`Kqev|LF#<JPh4jLJ7*ZLdhvcM(QZyUMI1A^26A_n$G2)as0SGb
z^Nu1_fW7tlN&!C>Qjll{X80H<DDs?@*VCR3bv{}Q{OYT1z!R-guYDKSb0ItIUc;|9
zxqLCKsewK?lR<QkO!9($0+X8;&|u?wP&8i*GZ5l}bTC4&j!4-Lxd<#E(tjR6^D9LM
zzn-5Y#@!)D!2A}~Op5J@JT`)k5;`LAGpei4L=b$upP=a?4xA-dF868$rkrrHG#V(!
zr4KwaV%eGBn#^M46j6;2$BK28Jbzc}Xo!jJv|5NU85w;d5G^A%BSqaj1aqkNyr|;^
z$7^V13OZJET3}(DmwG3pZnd}z;@YLeWu_C!DfZYee)n!<XiDzoL+YF54g!rZ*T3w?
z_J(M>M+w2^QYLs_mqMr%XZY!xCh!2G-UPUjn+50Latoo)nh)`tL@b;CD^=4*dp-ug
z$%jGc_h$&%@Rz<{?i-1h{X%U^2t|ql+3p|ik^kK?JVL<iEm$dPUs`1hQ0wd|r~5Kx
z^okaTL6^K$^>ygd;|G(39tW|w;MFh^w|y|LwEkK{XeD=IfH#!0ctVW&QM?tSH=adX
zRi(Lo5M&u;2@~46f$O^LVFTe$e3hFc@rDz+SJ_oyA&zK(e`|ex{`1TcfZMS_nKV{B
z`-{SPL7jtz_F}j?y@uZtovfz-Sq6V0^7^4z_KyVMQ%$}PFw<>$g~t2fb@cC8tr^|-
zvE9JMEO+j$2{oK>%T^_bG@M{a%_M=AIs$00rAmS_e`zFGta@CfQ5_*Yl~w@h;b?CX
zz+BkFwuHQ+o<DXb`t>Dd!im1@A|9&;{o0fcu%0Zxe<Y@=g*0=`wOiA>AY(mj^wd+}
zhIXrI!=2g8c%Y;p#z#ErBSYxmt1PW-Uh_K>eZ1P&1{wRG?6xvxo8?-T!(c$1#hjfl
z!P$DOX@?PHEK?u*4DMH#L%zxrw`b%MWreDxZli>j+ReCtn|4dOrx}2hy^J*$2<`%Z
z?gYg%20gFp`K&$xd)Jf{XD#;g)C`A3;pK5d<{3UP%SH8EB4&Lrsc?G+S6B^p4`RP2
z@ZHs%9>#f+pF0Zj>ZR5j2-<3$RnOyCgG^Eg*G(r1LBKYV=y4dQ0>WPy(Zzxy)7=(y
za|FmuuYC?aQ&HTEe}hOLXpC9BAedUHR3X766WtJC#G)gzl|2$6pq7i(!Dn;S!4u-P
zqzif5ox`ez01`s^mWcdda<p@8!L-bD*n>HD^00|9QoY&X$sMKUG+*N9D9&WIT;&DV
z-&+=nyVhcM%v97Zf(Z+30ufjFYjx_k7H<Kl!I0<iC01@hy81)q{u56foSv&=1U0as
z!ide_>}@g*CExW^e+cM*3D+$3j?A1dBwex|xTVwX;RU^1`Dks>XC3^qN*CEZOyid~
ziT{(0_?Cg=!9R5BOf6ze!18gy?omypoUha(b9=5M-VvJarigPD`iVR2nV3-^+vBoY
z_*ui2g9cBiQQ%?hhN|%74D&&Soypm_ceL5hVio~o1#pD%91G)rq^locsiWPO`M+i>
zm@kp;muc4+%aLHl;gNY_cgIQz0GRJJS4pwb=d3_|!A!-{a58?>Iz{P@!c3!Mck#fb
z->8Nf`I6zl2Q@NaToGS4+lLG+Mo(8#kt?U<$Qdq)`}L#498#qZt{~s9knZ*bhf#^!
zjKMT#mgtD+i<aT3Ua!ZqEf|Rn%`$oFb%iagw7)tz;5l-lVMk{As_v5cLGBTgpw<%{
zqVWE~OJ0?C_l@^QCH@qi+8lj1U=lcCbV|{1@wXph^WWDmsZ7XPv45jhEIBXrlk>;;
zE~-<iM)7REW+`>5IS+#UvD^Du&ZhapUMy10N7U^F7Lz5N-I+YI=<Z<)bG~wKEkst>
z&)@y>Hk?qh8}$UGOlN}GzR;<JaQkK59Rlx|p;8H?@KgPCUH<?*%<sVY`Y_Ho_A=iN
zb&>HAg39gt1<TKX0hc@??u;-!=NWBLY-({-F)Y)reoe1k=Z1^_Lazhlt=<#YVc!d$
z#(C1bUy8NZZDTWr3EEs#1S}UHCn$wZlH|h9dQ=#<`Y1Oy5D=^bmMjzOe+QHDMbaC!
zg$r#nt%nJij&Z9V8y1-7Mzm0nrYb&6VG!~wj!o+rD<EzCl`(99qHn77X13W^J|6wc
za`Zk4YmRAH=EoVc<9NKRl@e0P%}^PX-Y7Je3VfLp*{OPO1&g%SbxNg|88#ODBNu8H
zp#AAZ{jLhSS;Mi^Y+}j$pP=x$0+nORz10wQva7spdgA(00_(oxnUz>W9yzY@>f4>=
zRyy+Sz!R~AG9rF2j^n@O;ht!+JNh!zR$+%<&q{61+M+-lyMFZeJViy;F}m?3VvS}o
zCR%I_kJaAK_39s!16hhbI0MX+qO%!zV$X_9*7+-O4iulewU;xiKXEOh#GS$q5vS@w
zzh#$wHdy0#ZunfQfBpd=tIZ2?=-S%oc<gXpV4S{6G(461>C|X{d~d3rBz$lqRpQ5C
z@T%(4EFG@yZP)&yFW1bQe;&0D^N2?PhK*j*7@=f_(`pTM{^hIvGCC3kG=;L9ou46^
zVPmYFDf{12KdT?1n!GTCj8L?&-6OcSfe22N$Q)Gx(-T0J6LoAKVtzY!NO3l`yFJ?{
zz5uvVbuV0!6*?R6WHpL2T3}8xY7CzlwHjb?p{{XbEaL$j`(qhR3|A`En)L1_b(L7Z
z9TUd}d^FOB&ST%kS`?Lkpb|%I8WE?GWSI*1n9G=tb^HL3C*|=8jrLg}p{U_w5sRE!
zD$}2pH&c4^sCXuzE5q|5`cE35u%vgkuX|<=7kdU>)NnQ`degkUDyFZs6CeziyEMHo
zzha}p)H79{8eaeAXQWBspVUB}4<c2wUHR=wrDoS=i~R~%VjE52cBku<6+!7~s@wfj
zyY#B0sUKeZYSMqc#tD<y77i?3f9^2W;afn98%_4a^my#y{B*%taISVK0ZcSFw{m-H
z+}(+q#_|fCSJmy24ug<4F@?c;9X7+^xz-OQPr}&MyrI!F5%p>xhI<|aTJp?oWRq9^
z!DX}*k;bPbnTBXJ_rC@RtV{Y?s|qYC64o&Cg+>~y(M?pKca{qPZ~;i#CS(#)J&uD%
zHt?+4K>Yjoca}(fTb2D}ttDgJb=@821`d(XE|s0Jj!xc0Cb>SYn#~zyNrS9MANk4Z
z%rcr)8M4?nr`IQ3UWZ6t@yVu%HRts`yFOdP(N>|K5KCPv=oe#6DSMvwo-+~WJStsp
z<~3^Mq9e$;C7xmD<a{F}^tr1`^t*_jrx+WdqO&~tGM23s%iY~jlf!Mu3T^Kdqu2H*
zf42DYD*?dlJnbslFjIK<32$OD1|?>+P+2I%Y$p%T`#M!Yt0gX#X9~0D=~%#7+9!>*
ztB=(SVky5b#4Db>a|c~YP<UK*vh>kX3{xq@?o<`?e*H+nrkgyC6N|>)9#Ox&c9`1o
z?!Ku}*s7!Hw;0?O%eM$<$Dv*J&X>h!93#~v^|i>xl<VZq)fXSD_Su+!G^kwGytpfr
zFMiM{FiZc%6*-)f!1rw|Ve|2nCeC-2J9fnrQcVCJmm3-l*ZWm8!&)HjCY4S`Hq>xR
zA2%#BP~We_q%~Gk83H9}qChrqITb}6(auEU>cdKZM$4xj4HF*EWJdWHxi1y#wJt*6
z0(Q#7gr~+o?WM9TtvTxMF@pTln_Q{#!Ke8ya3>NOJKv?kCBAjUO>%He4_Pj$10x<w
zgrs4f`hM{~i)K7ge#f?<TWw`aDs2^8Pam4Ong`W8RZkJgx29WX#L+d4MYEjt4+;9Y
zD>jq;tO_#<f*u|h9`VPKKJpKxT?QK2kK28r#EIL@mN7AIJ$TgEpTXeL8>_CfN?p)(
z2}L()Qd822C~no;pV0k%bo}vr)<o})s(+{E$@l%CMSsM=!k*Jtfv*6Dc_n}DOGm1T
zY&0rV0-F@tFcth$t#SQm_iN1FN&SjoY?}x6HcP!^usM%EbM?1hAv{AHW<g5_Se1YF
zp@aZ4GbQAYXF_fityJq>q^Mv9(}&+e_%Z7SGmYHI3cZvpLlc4<&OXw{wbYxIywIBQ
z^k~BK?X*)9rH}PtT8T(Y)RmD;9tQDI+f>W)23=xK6iX?GJnqlLT*c^6lP==ERCzh3
z<KinUUbQQH8Y>qxT9z$opyaIhh>T&c3l=VGZ1;u)WA_?0RSr@^nQ8IURVL0W)l*xJ
z?}kjClpEDsyOJmmuE^GU+M$x*#yMDEw(cEY@H}5lH=_?++IA%y?0=4HoAC`DwQW+W
z9nbb^s_;(Oah7Pccy(G*>`MiqrcgtkCZwWs{?2kocbOi@=l3#9a0R>`DPi&b*Req!
z8d=fZdf{p-8EhHdUtiEEH+qq&8HeZf%C>XDGGHOphOgNHl)Uy0iy#ig_O+qLz4y~P
zYYLaEXGRq<QdcF-g5S+denkGP$O|;4=cZZvaa~iIBq#*7o>^2SeHr#gg59(?`K8KL
zQ>TCex0(xc9JRaadi61hQrNn?a)pc8a{{*+_q%o`p#!37=`B%$It^ysIGyF<<t4Kr
zo$p(&m8f+`$>m^U-&ESKKKDP-SxPdTAWbv%)RZpUls`*`12}FnUXjMLd&`zopHLsA
zYy61_qk9x~6+SEWRF_6STiqk?kUge<3}b<-sW@1Hpvc|#?vqcWN!+AOXeB?I7y$)d
z!IXX(7vEDw57&6gdNYK)$-dU!<aCDjI_F-@17{rk0+q}rF{b|M%TPff&}os@`ckN=
z!-Ij4CGz(JhI%G*a7#zZaQ_DGMtT~i0h9T9`-W#>Msxp<!kjc~EsqZ@<Z+6xPKV!U
z9VVEp6x32Z;FFAd%)jF*9c0YvsmkrP4vC-`JRtj<b^IaN$@`BBph@V3B+d@<LdYq1
zszi!QKYS%faz+8jc-GSc*Az9%t7XRv3(XPnk@%Et@wkeL+ZIi+n*u#{a!QhpC8gro
zTxlmOFO18yKO7XMq~*AV{p={$mdC|*R>%_dUwu7uYKNnyFt>4HwkFIx1?UbqCug-A
zG`Lm|)qbDI6TigGU6L&q|G3ocL&R0WbI&dCB(|cXDyULvaD|vR%k_MX`i#F!JDkI_
zpYDS%p7{RIZ3n`mvelFmyCo@hh6(KH0{$X~S0!!RTC*Z@hi)s+!!4>elz%qa6x-FC
zeN<j1t++xCGTwF*=gtoX%b<hQ`3COh{TK92D#=U3a3X-TgpX4RttoSBmf1t%P_*cf
zLlSqNYe<bK^rNzD@E6jN)J{`IzjWgSSA`0j$yfNydEf1o+0|e_*Ak2tc6ppbSVtNp
z9C*3vv|P69q;#Zz<^IsA(wE^Mw)3g)S|hrsUVEUE*C5}kc9`<xS#-rOYv$EW?h2-o
zo`mK5>({@PIW{;Xvw!C}%5VHpTxuw*zxN!eJX7DA_lbPkcHDkXg|lbY+0|#*R!aff
zw>lD^6K_8~I?QRAaAzG<JIr07Vt2*skL2_-c2++p(HeM1DNQr`b$I;ZSJ>rU*=h&g
zo5tyQy&7i0>EU$b?**BHQflDgXw|1Yi}t#Nc6&?m4_UAU&YPl)!4+lOq6`JT1EOc%
ztn{+gzX!xsItuc`XlSjl9PvN1z_egpLz#2&+8o9gTIol9E7eN`Ym~Sp?$dz(D#!nJ
zIYa#+sh({XJjm4ruG1Rz*A-yHeSAQe?L4&zy8Ax2jD1#Si7TIIeYdmxRF($@g?2~`
zARrb#ygw!Z+;H$a$oBBwX(;%iKc%+qo>I(gKB)={Z&Y_PPn`g-yg!CA3Sz^4f*>m8
zej6R|x@_<}C$4Gwm#A!cS8JNtP}#CgzR@q7*r0o&OUMR9sYB=oVY6#rm^}D05+-;d
zxt;1QCg9oLO2+sk10MCV+eQN#`5ZDB`CtAsVmJt==T502)Wpk(zs~G#{{m@^p0o;x
zj_bt=9(W$bxdH>p1|l%(me1(l1L+}1NKgYnLh_I8vEWW53V0sE{2dk_ko=znqSz!i
zuDJlLYfqYqrdXLJja(R*w=kl&90)K=FWW~z5dKjdcs(FdK;UOMK@)b=DjQ^s{%IEz
z{y0U*Ctc@e0UD}R0Q~-Ex7^h_$hcwKD`qQ4eR&4tKFATyA^Z}KaE&K|#V?xPuHMDz
zc|%#zdmEr!wj2-OO9CJ{Y&awb{tNSzWnv^wPxj}^DPh0{wx@pp=0)y}114z?F`LJs
zP5kfw6PWuS=%ua@9$0fdU6pD$p<{ToCSPVrrZ=nzjavT|WE`Yg@xc78=1qn}GSxXB
zWG&rESFcW%F~|6?INV<-qG41C(0hIeUILI}!9o)b=!P)aqyIk)<yl<aU7+wb!D3F#
zTse<{9Eo$_M2=8rxx1U343tC#DjhyrEog~+ljPW<3@6xQ;)ntaC+t|VL?97p{rdkJ
zY6#977g!(H^I4q@`nYLole8;<wCr*)!#CjXX#a0e&Rdi{Q@6pG{^q3`ECf2s8#*?J
z(+npBo3r6Z?cZpbK}vD5tv8zhV1k6tdmf0FS@OgW!AX(-mj5rd=v<Ve?iByg{Y;5z
zvU{u&UJwMwIKgO<!DzK=C)g?J<K)E5>%0M#VnvZq9Fov~%bx@!q3LqF_z(ymR)xIh
zO~w*Bl>bIs4KXFqxROk$IsppG%!TovDD=PO|No(|1gM)(A~>T=;R+6={ModClJ%Un
z?7i@k7zr@%H@BgqQ2N9hLL<9pKhst2;(c0s19X=Y_M-+k#UBG4p@)Vmwrh|Wg6ltT
zbm$gDvwuUH(**wN1SKd<w}+j;vnHVfcZo-Siwm662Go#gNWRGfP(QN7X&sb=CvsT8
zz{MXTA-*zj4!<iT@&0M%Sv*J}Zx9M@iZNQVj$FR~RRQk5At5Gutr^x$N0`v*edd1(
zuCpERv$ur?B8&(C0%ru&+85-4f~2m;o9%i=adlT1uJApFiXVqN?Z(b0)1}H_U%^`k
zMCaR}p|rAq<G%IV{FnJ1@7h}KWNs%cw&v@eka_Mq(1`~X4cL!$n2ucv8Qb=jMa<01
zypsBspd92aNb8_Scx@mV8W~|a3KB*)f1E!P+mwdJLWNO@B#Yl8mbMihqQORP+LtVX
z=8FcOVFNV7;2iTD0whC2Lz~Oj{9PXG-~N~P2_4TwbK73g!C)}NDF>EtD8l*cy=v3i
z%(o%qeA6PncD=$LBQunTc8uKn6SaQ5`jyiKzB(}v*(us0x+a}AH#E#~2y@rrm*eXt
zw>kR3Go{kPOiH|yOVDn6|K^*wXrXUfoD#bU?K0vpC_ZkLTiLr=Zs42Um1=cF%z2UA
zb?Htf(qKZO;~5|T*HK75abPc8j=fXf9!-0GR!&g-Kh5zDRInSS{rPKhr;r-y_vt2u
z6&4D9JyP=bI!?Mazg$*ZDH^ZaOmo$<?Q95JRUpv<8B~c@hcaSUW;_zVYT@2Qj!25z
z*J`uO)}`=Lj+2kVj{}$Tg!;M$o4N<rQ%!P5KSj}J_NCQ(=?SIyoa>Q<$UJA4Ke5Xv
zb)Lf(h^%?}`xg-`zq?ULq-7Qk^gZ3WpKoR1WH9)1sdl^0<10yOPuVh53lY9;O2KYQ
zca~db5|-c(l|X494rPybiu|A@7kX{^tCC>fLpNx^vT?WFt&KPm+7HBVFntYK;tL0|
zSAB7)b#|Bgo#Z>RD{wwM@F+kTY*^5j)nkw0huDz}h9e<ReJOAH<bNrX@W-A1^+|3^
zTnS2F_m~#hDlu&<8@eF-f#OdW7}gcn?&AFL$BW+kA71tP2!=aIihDoa{8ft%QPn>`
zS3YNKv$`YC7;byIXWXIJ5U3~kirSL#JfRwn_pu?A1T4uvYYDH?QlrqjYRrUJ98qDk
zLPG;CH33MJrh~*dcl9-rAOA4E%ii(}{E1=7SnjLWgm(l+w9r&d%b40!k&40nH~V=2
zUDvMfDCZYiYS4HWqWgSj<(%&w2`FxXa=oA?BjELe-oV5hw1;HP?_29CRt7)4GXhk7
zjUhO*4676s8ctP`+WfTj^?O6=pa~>OxZ3faqr$*3Jmq}6{2ujdDkEQ_Lt*%AOG@Cg
zt&Ptf1)2|6Yu-$^)k`Fmxeg1y>PtpldL8ox@QfW}stqQ2zwt`Knsbz6oV}jc#hC;w
z-K+X;*q?oQ<|y`qkbb<&wK&J%8e0QTg#UD7?OidT1V6iionjSPeA$+<T^{G@e&=H^
zso^}*9Y)-ahTiOVhl5CKOiB|@qN0ij@KWp83nk(jc>R(R+$JUFZoVu31?MaQei+HE
z5WkSeg;pB#6lhoJN?h#pm7t)0D@^ZU*gJPN4>4v_Ga3%EqSGp34~k<5U@maZPuCbc
zyT3Didy)!G6xMkfyeN^R+lw8z1ggCa{#XYq6FQOj56slcX6YBOul+4Vc$2L00;6Zz
z&#6@}Iv&PJql&u3LZ?|jKw=Sf7)&CTwjB;Yy<sO(*OQP|pexH<_Dz@wYX~*UU+qpb
z-c+$jwYDHN!s0qJyJy{Q;T!f*SnFmRo{x1Vr$F;iq`|mv=(1ahJD%TBf;0a;PM`p<
z2&Q|Uzu$};ywV*jqqduYHK4Mt^0KYQmvfYRAvsDNkn!=F-z`a!<P}ero50GZjHO&R
zC(6lk!jo+?4TDSjNK%!uq}00DRm^@&e=V=L`<kQ3Z&`<MdOoJTA>0s9Gc_aP)go=H
zTPbX=E?mSHfB5(7U9;HWb;d#(7b=qV8td4*z_s7)J?Vd$sg7Af^ab@sA@NgUtQt;m
z_asSYDRG&lytLWa4BQ_td&6~q(8XcaR`$}MI*go8EjmtA7Vze%h%}mQ&R|{cdwc-)
z%E%ZT3SkGW{f06{9k08$2YqY1+#V~0V@EM}P?jaL$v<=;^Wci>)6Qai=S(p45aQ)5
zR<7>HAMh($7Meuor}D(eyZE*S8qw|!cy$j}o8K)7=@+?ChKSK%%;g^^muF~zX=;2P
zkA^C{QO5wcraa!l_&u&Q!~^F2FwT?$ahag10QEplvT@r-6`neu7-XEqKL180CM78;
z{&24+jaH+~O<QN0T|#I2r;80=OkNbZCa7Nu+>>~+UR0Kwm)@!oD;#HbBxXI6!=RD2
zE5#JN8W&?7=o5ZP_c;;JDqN|b*MtQ&`=rf~gpUd&WxT!&;+#$JHo7|87%Ow`*JXOt
z@bo`d`{aWcs8ZiZYYcGjcA~Tv1k%xwl>7mbjwvdz+8V`9Q7~;vy*m5^<$0D2f-&xI
z8grIB<!g*-ps2+AyCdT&3s&USS=AtYDp+osdl2lEFV7{Mg2(X(qX(>FtIurw{E0U<
zLt--S{BwsMjb%y(hD~Y0>irRgyUwJ83Kn&UGWXn#e|%Kv_xwFT4>(5s?avS5Pb3c-
z))bw%5?W8~R}_lXB^pl``Dns_%-_fYVhS|y<rJdbw(C^U)1al>DYwTYA@f^Jh4B?c
z&6UremuJ%OaV^M}E9bnKY@qf~p&zl$KXY6G^+5K$6Qxe`cXS4Mv)FoStY^ENt-}WC
z$6@7JJd$w|Ohj$!Gi<N{>CU~QuR6TtfX<GlQp9%<CQxbBn{vz~lS4hMX4sQYl3SwK
zLsV5zkOtM<EEmA^LCcA>u*5V)(`-t7EAb+e5e03XgwAC$#0ICWuJeA+pu|}0*@msO
z5gT|(AvuB`)=s`Uo*3yn$4A8-I?vvU*RpQ?27EJUA}kmvugWaN!$U=NrF#>AA&A9u
z*UXXBYku@z!)RsW-uTd>X7)H^8Lw!PBFAPOHs1X21T#mLI)9I6#a4+sOUe#SB?8Y?
z7`(OeQO1w2FOdQ1YHt4e{)qWp*Qr@X>)4(~{`mM_5o8SWZ>bl*>eypZdEk0VvMsad
z+J>)0f%zx(*!kJuQ{OVfBTc~C8c<N)oa<|6K)l5Nb=-u8YY}h%C27-;(>oZS1tvMF
zeX`sllpw&~4W$<WR|*kP7r!S`OLajgttDv4SOyv4Aq(7iZc30xfG8og)_sFBy-#w(
z!3wiD`e#a)-6#)rnPsvTDX5HG{FXF1z2@MVm(Q%Q<-PvGWQE<<`7XA>RR(W~c8-gq
zq-Jf%6VD+mWviFcIhGZtsjXiw{dayXTuWRMI;*lCU%uvscSnfuDnWg#!xSt+sr%>y
zQEgah>0`ag?$a9~Ql!wQE=&ITqq3v?Aif~6E#~Em#O1B^Y>fte%3|&2AXS0H`ZKBW
zODE8M!kFQ?`)q|fVARf&>S6B7m=+HC_9O=<Wm&)kH=lav=F+_qLGgI&oZQo2`uELM
zC@11fta>#_+iv@0G0ferltoqUW`IgAT#8-VpdQw2L(t;RiF|z3fNr<g_otfA$jH#q
zyX-!})7s0zU9aRcQqXk)bizPix@zuvJ`;Vg9YVnNlFn6@(;fZKL=%1q{~1l<h($uP
z_X#_sIwOP!uyz8~@p${2&yil}@duXf@Qa`Y-?*Lyn5E_Fcoaz@Dv5n@0v7piqW9ZD
zK3vgV0Kai<8Xp#tm5Z;Xj}cJ+DAZ@(clQb{?M|0ZEu6SXj?jX!lb|l>ctmw3IP(CP
zQB~Y*H3e7f(oldF9wsJXgJ)OQS(rEbLk@A7>+u`e3pd_-ORSsobZ+nzy;SQxBsZEk
z<k9fx=FX`l&vbo*88UO<UNIuD6+USojj?kcf%j2n?%*5S>Odj3-?7WUls6CI-+5u~
z<bion>Jf$zaV<pnuZ@e;Xu#`2HUPVWvp+|TwHs!XT{+y`oAhKUa*^2KXPpd6e@>X-
zDQm+Nes&_SWaaagz9@ReXP2|A*<n3jZUn$5+N-~F&o_7f(b?23@x=#{fA~{%Mlh3e
z>t4e}m>;njt7ik>y6bygWbVrfCf);U+?V8Cp1jDX!OLO*;O5tjPj<apYL_+}E<k=B
z!+LL?VXOF$oI;brQpdu~9gla`{D$czpQO9bawp5PyZ5sXJ_Q)p4dok5*3+pqRjw^)
z*|y3I#@ozaNQ7L?z7_wlRFxiB83^|>TICQ0@q5afiyF8VKCAhy?f3B5FFV+O+k63!
zGW78f(gKXBA9lMvMgS$&ziyYHi;P-Qm?T8}vHgM3vk`CE1t>^QR=>^gMu8rcu&jGf
zDJoXc?m=0_$CD-YOU>wq&zWY8o7LmWpxUD)&te6km&ZmDTkKBKva%t9ITyWJLZ9DB
zY@bk06UE-XG0!K-UV0tn>O2;M>~5z$v~(KvROPP>kYA>`reZ_UmKSUet;}t2$6WTx
z<W5j5vyaia{3^<urFvXMKhJj{za8rdY7P^fUIUkr!Mx#q8Ivd3(eGP}KT_Hawn2*!
z4BIWMvQ71|>DM|Butz(cXuXNJ-puWMX@Bdm6TLc>&xd&ve?@q`_nPe}FPsDQrAE?=
zqBty?0W3G(=FV*dy?`YNB{luY#1u{TtAoC`%!#0#!#8FNKpn_})wlv?hsdk`Se-2U
zc#%uhszU+;^SyVAZJ~gui1-{ZF!vY^$1xT`$Cy&k8wjvclxKCc8?_ab^L1X7h-oU8
zxgB-56ehHg=7gn6sLxFCIyOGbV4zREoUU*YTE41AId0({x0Q>vVb;sIfAogOD{eaR
z(=(&P#>y6@(%=}D51=s46?7UI1{6>FK4Lz91o;Dp;F=WPG=7hJ%tV!4M(@+gqbDk3
zZ^Qr@(E}HV!OHb9&WmK8{Gf;r4W-&nH*<X)?NtL7m8-?Ws&b)lZwfWIQi}M<C#ZV)
zl=fF*qFhw7!15(3<cWS)VMGp8w$`Y+-P^z}T9iFf2`_SuDbl=`#Sq&H2p>>4D2Mu*
zA`hrV2yA%>_ou3R#mYUEHkZ47soeg67#<8*aae)IE;?tDI4DYk<KNG;cm1-oUKVI@
zuy0S%O&Aw{Y{G+m?4-#+9Rm$pgcXaeryG4|&>cstQ&bV%K(C^MUsuF@vm4+#$RI0q
zPJ0eN!CA7pwzr$~dAC_p8v<a+K?_YQf}HzCo$$&~P_!@Fx^1Yz{m|0`<Aok~V7YOR
zCK6*WJm{SxpR)p}`v}hhe)@(xNlXNDvBW0mYBeC6#WeOtcbZxXVucp!T|JL!ZW6G=
zfBNdH7pOFQO_4%Pg?1(|bDMz8<mYdPWQI2?<)Tjn`iXH5i!@7o!nkGFx;D<uoN<`q
zgd){bkA{hp6%E}$`7QB^o5cNKeUG`SFJB2055qX*FKw%JWsVI<umg?DSou94fgTLY
zC$_?hr8UJfpF204uTMKSK<|h(Ks1y9O)wlYleWkZ&yTkyoB2<7D|qGmZ=wxh#BZ1=
zDB#%qK*criP&Ce&HvEuq4o`czft;+NM#oB|K=%M3c)j`_gNkL2w@V*6DI%X|sLsM(
z7qvFlpH0q5gXb%Qr#zA!{8`*L9l%+)K;1_-S4sE_jy=ZTrx948-RP+u)VNu3_QO$O
z)5n<9alw3n#wy>=_T!(e1=QFhuc4WyS3bBTYq@(GJf%T>I?3fRN<Rze#eg0mleGbA
zLFJ@6Rr974ksBPAqRieo-)?$ijdv2&DW2An8B`$m)1qJ^Gas#TYfu9AoghL%B1TAL
z9ualz#PY_Sp^AtR77j2rQizIjpIa4NWQ>THXsK0$=V9(5HPXjX`<~`FcT=*Pnx(Ig
z*^U)k%=69Z2}G9Ie2`JqRqNlJEgmXxCh5Ggx_ih5Yo*p?SouU$Rg%;9c&y%t>3=1{
zP;~p1J=ZacRc5NG&mn*!mJfLifz<a>D@V+<aa~|`)bCES3H4H?6SJAMsmqxntpYc@
zI=eng#CP!`H>_NyC<sebf+n;N$SVI#5UXscIRsY4@<~4_u{Z6@klnSDZK^KMFeie{
z(?@agPaWms2c4{B3;hzq3b|4gx{StA(LC^;ADQB<NroDV@zg<6+k>RfiI?q}zSQzc
zrx)Ax+h}fmomt>^T)a<MnOD>>6Vj5W)IcrDuZTN>Ri-JLrA=f$0E_UvenS%!M_)Kx
zxMXRw@?{HRt6ny0=N$k>f%CICmDo4ZYki*->+S+VR#_po|BLwuthK|KN9f<)t!zEO
z(qPGi&3wx;2v(mdT29JYj%FKsI%=b`x@mI34D3h3_OZ2TI_Nv|GFRNY@}<_9dmZWe
z&p@SB2Q@B!&JUf9Ok0+OaKD-c6NS5s$?02_;`Q-J5}jT#zun3=tEfC;Ke(DAr$;CW
zG{$Ydl#p=9;z5(YY@0o1(Nm-U2_s%gwtd}87c`<iEzP7-NFA%y_gDr=UA)vbTfM{b
zblter{*qM6fYF6j>!?tZ>3c=&Mof1CV8ZEtw<DSO^E6Z7d4Z4qf=ZAxXn}B$unH<A
z4NnU>&i*U_c3**ywrQzo3R_(M__N63OpB4W%U0<S<(1^nWnzsusWZs2w<f?eqjaB=
zYL6qk4XVSHx78$fTM8U#QzS<n<s@>AQ--3hwEnovG#uXgbzu1!EkPO^Z=aSG;%Ubh
z4%GC}{#HDo-kh$%XBF7vE%7f89yW)!2l*&WPTpBQUgdaf(s(gnfr5@MnQC*-1lz3_
z&c&U3h&Y4Q^Xz~-OH>!oieHYgSq;#C!$JVOkyom)%4g9(-vlmxCosfJMY~qdF0hn|
zR$g0QcPmv8mQDYPB^O5%IUhHj1o+p|3Z%P3<g!Z0^}t*ztl`>^8V>JSh1F5PTtP89
zz3O(Hp%Gf_Ezg}p3Y2en>1k~F20at6%(159*zGW<?Ed`9#bSwc3@jy^a#6<>JVyhC
zyuLx}cfA*XLFMxtL@G7WSFVS88;6G)#{y&Mb?w+golE{m(pqGoWkw~2*3so^aJ<d#
zZ;8gsgY#V7vTWn6OoN+4y^M{bUdEKI8^BwFyq~vE8_DXa`@xUqLb2~gv*|xXunsg<
zQ*+)JW!7op&7x7sW;5=~oQ*q=V$}&5TuE!+TXIq^;7o0D;&_qvpf4l6RXQnIBTi`k
zbbs~x$kP=-<d0+wuYW<`o>Cz*n`=LK_L@FWKW8-Lt1Z-7Kw6?iz-phCS!Co+Y~@GJ
zjKEN#e6|J0pA+VFUEkj@b8_ODzDSes4)8g1vlAar>73Z?`fzlU6|R*e|I}G-@EvkU
zS7#+ZD}Nh80b(l??7CdR1)b~hB(Ijvz533iGBF7grPmYXrth_PaaQH}Ol7|BgFxH-
z$)7iT#Y466pxFY*&owMa-g54KdbT}NiQc2Ue&pVNG@@oPktd-O()GFh?vu(fsr%de
zpHgIVif-191`Fk08PicJuPavC<pl-jmUehRBW6H=O_5K2U?DfW8c#h8jG<h`>p6~E
zpCIU;l@algulk#cDllD-<>+Rc-L+H@(h5hF(3g}Ao-|aV;j_$5P3-4t)N!T~9uePL
zlIQE3WwA7ai))0}Ig-yaCZcR+5R!mfX&##Pr41@9?*!N!7cFDoj0pncT!=*S9STaz
z>*aY5eT+dvh%+fzYjnZPR>7$|Pk$#GKE>c7q6?b7`u5#{LR}fX7yWfN-Zay5^3{&=
zdH=7>1+ivVr03zAVyUH5yhFrTQ7_ef;f1JDtsDR|D7G~$X{EFQsX8et9tH-&tl-!$
zUz^cCdxSpou6bDuuW6Ga?-F{d{*Mb_hKf|A&KY1mQ-`Uv{NjUna(g>0f<qbC0~JlF
zWs?@?gTYPEDFu}ZI7>dp3<wXGba1F-Sac%6&tO?avFm^6?)ZxXuCO7UEl0lES<V)L
z%(!Ck>w<o~lo<<io?$4N1F6n#NwhDSM}VQ$kkc{@uKBzHyumZT-*(829Z@m%<jIp-
zx6`M;i#z9i>Jjr_RMoG}WaERoE`-(p8oD_Z#5k;;J9)r!)n5p6R4-7h8ne{&ZSQ^v
zb1t5$C(P9riDK5`Oetp`Tmcc?`t^4*d4`YgM)-)6AywKa3(%AvFCNgfNE`*~jdUH!
z*s<6WrRl!)i^Cng>caYznmS4bkJ0xILsPv%5lya67HRpbGBsoCHx%%!KEgF-X{P#%
zAhsh-rm$r0%q!xqFB#`$+h28)x!sA!ejARYS)BDB64{=4=1xF+*ZKlU^~Fp8O;bYK
zYudECK}CC%j#6=jU*k9?XFWv~YK1Xl7!^p3pT~#=!l*~m6j6Hd2<c>&d2jaJ@>wgM
z;e?4`Rvokeb59Yz<wl~A{ab1%ZV)yEyFP^peXMT;!A?$sa&SNd8m?3<*fTD8hrIER
zK>k8g<cDwaYT)s35CXzKkfK1<#g7zt&GgMW)4RzlnvT2$S0=xQKDUGj*x6w@6MuI>
z7||R{pU^2WU>sh=S11EMu5ZqQ7wN$BD{Fw&pDgI%mI!^@2S~o%%ZBja{}|Q|3?PlH
zZA$~t^DtJyNN!JyX_LVNQYg@A0dVGdF#ie^0ejzqiq&tun8N~(y7@tj0uL0f_YH<%
zYJ^z;{2V<n?GuHEZqNhWzysg+n`%FSyATk7#vVg_Bl*yV7=n+PmtQ8rTaV^pzCByZ
zdE&IG?smEqzdGH(b-#??W$zXL-dD;;{BIu%UhIg8!eP9d?f(zl4g8Z2liTBhI|!H2
z52QX~=oApOsx3uA2{~*Qzha6@NX(Cy8PRE0=B7jp{7p&0w<HeQ5;zDp9@1(QF5n05
zaid^idB|h8*c!{I7Qth067uK?;;h?#Xs_9T_RVYI-%N!L;NNdTmBg<(Grn_9qOijo
zS3*leep6Wrmjy9IG5?31KN8q%K`wZ)Hwj;--r>vYV1|{H56ulI1WC_7)E0cj6I+T<
z$3!_n3%=zyD~*K>;A1|(UVvmkxR1s2c-r}qpd0t!y5WCg^NxW)J7?&DUUI09pb})O
zdIU4he;X>C4+)zHcnUww&T^9V?@+Fpof8*9DFAeOAnp;kfu8@vLKu=z>;eCg0wjn8
zB8W9%gb0hv+6}JnMmZ%%%?!RJ^nR_a+yAc&|L1&3vFGp|!v}Xoupaw@|C6o1Nd8w=
zBbb1!y@4RPaMT^}oYfO@)Q_TGX(CFG?J)kn4p1CP!HXZq-)hNC>WR*X^lCslA#jL)
zbpCgx02=v7!zl$9LOrP{T3Oepu&Czr5HIeZ#S59<h;=DcoMv$zKN{(!2E9ma*Ukn@
zDjE#N9A<d;?~8VzLkJ&21b7t$b9$ogu&wG{-Jk|it-ruR6>aHhN+Rk~gV*SG_I!~h
zmjf@#4h$f<*%N3@aC%5H7T$vyUZnSd3g#>7*8WePr}hvq41O#iY`EIfoCNi4EO?D8
zF6&)dZ+7sak2El+e`_2Lw#+po3?0m9CzuyhN}t4ov0g3pPuwhSAw;A2b(Q5v1$6F3
z`X7}>phAbJTKo0?9een8g5rlKywFBMi*oya7Uh2y<$ojvQkVZ59G_z>@1A(p1<1;&
z<l~5ni8ZkYTl+1U4sa#&puttYmjF;Qlf&DUiAb^(5vi=}fO^ml>?QFAooeADzlS72
z__}WqfZ=;=>|XrP?qtWHP^SUU{p`T@5VPiNsV{{HHjti?LE*{u%XPacXX|pWhjsTW
zW%%l`_L?dG_326)rR&k=qbG(Rv^J#k_7m7lUzX~NIBfmG9nO)Lx0h9`bHBbkemm!V
zcL&^@ICnUv=Nv*rL{u=A#;C@kUGG5OdHQ={A(C1KZ}V3b8c4p&+Uw9ej@MX|-FZkH
z@b#;dWIX7eV05rLNT=Ia+r5x|ps%e>+!adXe|33o!Phi6v`=u*^^Tk7uc1>91nNDu
zQCdr?wHs+@UqYwD4qTMHvluP^`)F%&yvdbIv)VEc*!u*UEA7rFMqT%t!Y<E`8_oi>
zF7IRV+y3ql_h~fgO&l*bMgzB^qVb=tW;`rQR&9Ae#Hs&hs_yMtR{YMze*eO>^Q@zV
z0KMbYdO=C$Tl_C9Z9JY=7kObD3^m}w_J`<#C*<E_=%WL6f&H&~98cp)o&`|+6h7y4
z#jNL~6YusGrI{=FoHhgeN#99rH)TkM|DN|J>jE=7thZbKu{JnPFW>;^Fp%HS9Zn`0
z36qc|nS8_~kS0>X6!88g%8T&w1%8^=*93N7BsV#70ll9ROwNC%2w0m>(v2pXT#rXR
z)A(JUg1c&bX;a@K2II5(1w0RI8!gt+LQATd^&kSX>HyM1q0mz^^A62%V{}Bc!Q=8w
z^CT!@s==A{>afIfekoqB>w7F?RW3U$8*Ak1WFdU_>U=xHYO<Q!X71Lz!}Y9{IJ$kd
ze*O$(H~)tu4nmj$^c<MlSRAprmUeO-b|d*Jjt7b#O4$SL;#l=aIbQ!9P*4+NY8aC5
zI0P9__9TOLzbud-L)0lyWoEaY;U8g!-Ioj_&a*n*_Pj0_qn7K)e|&YeQ4-0jC&>J`
z(^<MMi@g>QK!h%Fzyh`|_a;^K4!zpKV%BYXSB2MuWzh0=7pCj7gnQXM?y~e9mml25
z&Rd?GsQ#koS)f^uN95@%rVOwXFRtl!wrXb6HLG5K`-3@wJv8I8LL%U|gK=)WJymNf
zhGu>GWG*9Iu&v5s>>w2{tVpwJND#|)r9UlgGaMaEwaqlmJv;3%Gi!xy^_!d9O{xgc
zA0htei<#^qD*dP42W|@ZWmCmEeEjE|6}2w=G*p@l3SH<z7cbEsV740=7`z2{29@f6
zL1Z=*m$I6!R|$^3hs&^Px-^v+LMd9K*W&SZv#i@e`5;<eaBwupCJrT?pZ2%>6^qqq
zp@ybZ&jxHU^LlflLZQ-30f#{uC0yu=TfFfatlThtn@l{5T%@kweS%Wceo|(SXbV`J
zL}B2q_%f<3^-UZF7KLtv+jqD-${8<1c+QTt3Not9CE^+!H~hdwMv}q9F^MRH3@Z6%
zPGoL7-Y+8cn%(ODu!t(Ehm#5Ep6l<-eHm|d=gXE$TAowQd69zC4JulMUP&p}YS-Dl
z-KrX^%w>A{$?JV^fN|jK;J2}$r7~U32`zJ3a$IN%uj7!bMEZ)fRuZRG<tHl;>zBKV
zr}vf1FLoo8QS<X_o1(bw_i~+e<M42jpDQ3WTs+!qM;8)S{9)Yx^;{`icJMbZ71d%`
z-B^hpy2j+%aE`#4LpIol!4Gc#;tWe@AD;DP=TYf%Li-^9&O~FB22&=GP$s4Z$O`w{
zolkh&Fp$Fx8o>0X2C3{WSR<7ot)T`iqU%&A5G8m3tOedv<L@7+W#U$kL4yO>hveZv
z3HIO!0lP&Maikt~*dA4=+GvEfy#$l|$HyYU#(}?r23`+*^2YlFLjt^e#>+;EIEmIb
zzbd6AXQ+oJYi*70dTLmJi^3|u#?rtPK2#-taiVtDLCQw?qo(Jsk`s!ibZQ{#F&;72
z={W<0nFId`)8Kq2xGN!&%w+Lye9Te6#Yb`>oUh^Bio~*RbcpJ1bcQbTZN9uwO!S+3
z3jqi5O^K?6oSo$p*Lr~q|L`Oi;MMjTonCLI+>OEM9P1=2U2N>2ru{~M^iAO#3zoPz
zbwB{j6LmR&&M*wcXOW!ez0{p=>9A7qZ{{LHQ59~E!6p6^6OSt@*d_%8YWjBR*hW-J
zJW8~dk6aIfC?kajFr^CB{dR;67}-(K#I*;r<<nRK66-lL5h&#dV4ZX*$YoP`O0<%%
z9%9Z%L|~-whTfTnNRA?9)TR_4a?M&iE)R57Ov^$xBCj@nVQ7da77vo=KbZG5W7?Dy
zbC?PUA#^vmE&e!tWjrH9>M^WMK|$dib1|Sg3mHwFz<Q+kB-sfb9C<S{2pQ=WZ_HSV
zAC=|c4Zh5zf^rOKqL~C@bL}4DGCF9Vs9*Iz6@LON`bfs~1>N2-jTcs37HE8O*CUv*
zJUN1+`pvZFi)8N48K=b$%&#!MOU;LGkcvD*96F_y{Ww=EA4&Qqz<6QlL04A~cH>*&
zU+&t51kLR-#E<=3IscGNDNWZq?w?#!2fEx5Nc}+aFHDjGX83O2LY*bl#=GO<Ww=#;
zxxsK0wT#0V!x1wPBH_6X8Xi;badLf-aV9>|+6YF2)C9+o7jLk!@+9YwPUKUxFUt&Z
zn3v+aOq(1M?WsQl0#KMUlZVZ0Nno>P0U=?e0>3E2N^5@A5f~Eu&I3mf@r~!ju7B8X
zpZ5joPGkMpWg&EQ>2YS`Euk(Ef0FoJ>_m{wR{W}93_*?y61RW#3tcl2aas<njRq`=
z$r_^b5Do>Q;n01`*g_3tYA4P7PH9Yl_;%@g?3u53g}R?7=--pE;@v8t>85SSLt$|N
z>_XMJul_?cwBdw10Wk}L?Lu!wyw917T{Ep`n|Xf=8h^)eCllTL-5E*|7x_S;*2ci9
zhSreas{0qFaW|>yV+k<^p|0af>SclhhlfWu886US0c!qNn7UFg4Qjct_QeDQ%Petp
zL|<W_2Z2MBKkP-JsbDZpR2I^v96%0B^v5w2XD_qHOUFNt$LwvtGRPbj<^N2E^wd&#
zp5DzAMOw`vHQRv8VpLY`RoB(>?n3uQQw!LB=gW{&_bO8Y*A9xQ5K+}fWWO=<dICOB
zz6)<tWi##j3B$&8Dnz(`H1*r?eHoxyf%ftiw<LWjMo1bTfPcGYwi*ed82kwTaa(Uw
zL(ui8kmZFRKG9nR9~53)41M=!23!};+Q%q=gaQn-(KrEHX^iz=NJgI_b`M6=_-x2I
zUK?+97rQ0dMR<T1JSza=_%tX7gUaMj?X6Ef$69vZNlL5hwDOtQD-2BTO^Ih8e)2q+
z)OkffKbDIWx<THES@g^ScdZp%TFVI|3~?_AIG|`64UnZ2!1FEA&M51<@>O?w(e8NI
zgsI_&(v@m#(~gFaxhl%Eu&K9@Y+N`APLi9rd1N2Sq@cDb(aVc`xg<{B*XzHEb(`{!
zr$jplN4^f;H6IKhUAS*LekX|>>&rt-6XhPKCA=|#@SvrWyYvG)TFXBY9OpTexg~cw
zGlLiG16F`>UBj^nFb_}?|NWqhc$TMUw`i~w?W#+_mkBvzz7w?^E27uSeb089@?>W2
z_UBL!sWHg|lJf?g&T@sr$?+tzv0Vjlb;ge8^I|x^MvD9;be8g0%(zh1y@U-r%4wG=
zg94RJGk?QGSAzYNCkF-w9>oUQ5@Q^N6PnNF$fu3cPtw8SXygd>WJz|6ggbsOdE4{y
z*6K97X|yl}6wON@q_3rN;uwp)<2qtnp#C64co9@ZK25Tbz-}HD7Dt&M0;cG5Is8_{
zE8V{I0oX=roXFr3isyd{&c_@{Jwebu*Z<_-QGW@x?QT#GNK30n-15xT8lb27_zGt}
z$m+GYzYf~_1pRcYoJBS>tued5FvH)@h66}gD9VTzpGk95Gp3y=T1^L7QWyUwA^)~G
z{47U>L5_4HCLl~K!Dc1n)&I##r^ok3$E$k*&lfpa7#muk9shTq-i0|$`rb+tX?Pp8
zag%Z=g_3@Z?|eBUg}pH5&<}6f;`CNBBM82)c4Wr?3u)X`Xd%jhj%(Byn?gVN9M`FY
zt}iMCufCp|<pIInpp1Aq{`yy?`EPR}<+!d$l>*hGA>4T2iRFgJ?Nu`zq)P-JU`le|
z`Cd2wky`nbGpsvk<BBaz;AwNW?F(*?@7!A4^xKnwefk~vVC-tuAa8$3u-F98T7rDa
z71WPR9%|xf`GS1hGgs`#i-e~VX<`?^{Bt*kvV8!fu{WQrE&_KhiUsS+pNV`3BVc<r
z^KOF~6%{oST&*sR$EwQ|&c7F+<~-Eqi<V{F6Bmfp1DxAfLHD!OCpLE+o8BG1TKpP$
zcCnOcS<<){B>1Z*!CtY?dZZ2vr7~a74F7NN1h#UrJzN_Z<g4IgK_cUIkaF4_r&#Gr
z8AilkLGt(Yl?JZ)!RdgX`cbAH;2gaIH+P0*#QmzWpe1<vDx%D&yK8+G_zzzrDJ3&q
zkN=c;YzL}$RmC!>3^{>-LW$J#!uAwo<PIjj1TfW@4T{dV?aOtal%hVkv?bl`VQnd-
zoafFs8I~6sF%sEXaNAty4!58IY05a@->=&TsugSJt>h|X_%&W0O}=9iuYH34;c*|8
z$4ZLx!3roqz)Y^vX?$zDHdR+~b$_f8IMRvV#`HXk`rmr2=jAMdIA+8iSqtsR^zryb
z@8L~cLB$M8v@)T7G_9&&cl>!f?7R_?D{aIUYr(Ckd*|CNG7DXyS<c%tyjT_i#`0$r
zBef1|Ls9ffG<Be#H4d|;GMb8bv5^HvmFMMQd<dD)j76DcrY7Vf0r-K^{rqUery2@0
zn1n8N+V?;VwT@%<14Igpry#(r5-!ngs(<w@N&-V#lHrUt%`Lr?MVuAMI+{kVvefq@
zDYwn1#^V`RALT!!0`H{26PG5fTVk_h;@7v2w`WU+8w_s4#>`Ik!Cwy41Kup~uYib1
z^y+klEJgPU%vd^4F)PzztaxwAzJG1^eG&3G+at@X<##tnI11#4?S`PjQ+-CnFIQqD
zhIRU1d^E>r>K$d8udiI0->~X7(t(&c0@z@Gu<dEK_M4M6@~IwY<~%lYULcZ{0kQZc
zLtNyt6-w{xk$m$fXiqllO$nYg?zEvjH5(we(t@HQB!csT$ZT*5-VpFQ<ZjM1X`xMk
zkXJgDzpnB6>ReN{M5jTugc9uV8S*W`>-~PALvH<mRU1oS;U{?>5V;mpR6kG<cz0k%
zGSbx(sBiJ*>i(UimO5Q{64E;pn9k(NMjj&6@gU*S>z7;|PZa}p`zI<)9nUz1A}H<y
z72DuZ%Y%{iEE6ZM6C+up!X(tGUpR)6H`q_0F`!YDJDpjpS{4L<E34sB978E!TNuoT
zvmM=Lauq1Q2#pgx5V>y<(xzhuOvT9H0`RHVwlM@aVUr(ejk-eWXAjnf9eJxPfLrl#
z*(>IcbTrLc2}KY(nc$`rEvrKMhcHp0qj95h_j4zDz2-(;G{phH1_rL61N(#u)55z<
zM?S7`-*}uaqu^NH=k&CPN^roBG(bCe%w6m`u}X2h^{+p_30MjjJd=|2jARM9_M1Bg
zu82_pFzFs=YqV>#EiJMsyegnD25+Ou2ZbfQ(obIk=hwJWNbY0B+EBk3v+*kPVVW<_
z&&1Mf7rM;L?ukrQn94Kv4Qh9mZ=@O<-QSFVmdF8j(hvzS4=^d8bzt<2w^)k%P&i?t
zz5)IoxQ=1{C;eqP!h>0(*_s0NF0@-Xgly^a>a?4=idjKLsjTvN*qi;IXhX*R{{kqD
zU~e{=d2|5;M-D`68d&Wp7*;UOB6!|6aGD)resYv=7VBMIoE|<=3Sc67Aj&SE%3n5P
z_2-Cb0fwTq>He&PS0!J$o(}H_-6;HV`rJ&AaU+IsK2N>}Jo<ADS8yS+NM$n(Q!^x3
z`#Fvtio%2RxT8eDocKIGaxWAgRcfp&QoH41FBorKib!(NZEx|L$g%Pra26aMuK4^t
z3!sscT@+})OQp{WKMQSmz72QKNeeTY=r*fB*qRar{2!?T+dd#lQgzR8t;S4}M~Ej=
zZkJsg&w6%iFcXcD@7;F|sWc9dL0RFn`n5{Ygxgb#*LE04n`-mX&x|*H>V)e^GsEl=
z;ZUg#Wlx&>QNjm*e?C|b2<`C2V{OCsfDU!YhhiK@O&(a!Gh=JwoI{W-SvRUvEmZ&9
z?YZ;i?%|Bh<A>FFoBjHwr%5*@r&v;&AWJXZlT;DuRfGa8;#@7Z`FCy;RE1@rT26o1
z5N~}5U>(ZRIaYIJLk*h|HFzvOth3dTy7)6sX~Q!jEX;jid^jW{%dL*adXsa5Q1HD;
z<Vxd>2!4U?%X`%LgCh#KX*>S)X{~nZSUVBG+@gz={R0PqzhF3lX4VK8=X0b8S1(sm
zgb|mq=Ge?wJp2CA9B=9^S=A&FJ*ZpB);%D!p%pU8Z!pW?m%RaJ@GXP`SU5C7y-kIF
zx&X+c4(a56;@?VG2+9YRzv;4PB+^R{SpIrF2=}moyaO%q&3|T97a$Txge0s0NSi}<
zoViV<&jQ}EytlG>GoimFj&=ZSPy<fLA2kSUm;!@c#lz(L=aG(qD6;8UHyV(Q6R{M4
zSbT)uQg^|_cVjPa8ojn?lc1ge#zyWr1bZOVfWg*FbUeIy57it8a6(E3cSV7c0!sq0
zlmG3PBt@{h>mL{3f0pNeBnIl1{6B~2|4Y5%MYewX&i$PYVu+MN?t%FB&aCIPyCIy-
z069-PPk@plyS-!sDJ={sB#@|~0p9Kds%Ue@zeoeniBzb-O{V(tLmHq>Qmu&)aJ+`E
z3GXKWeNP|WFVbBY4gN7ZFh<#*S33TxL19EZK>4t=y*tPce<8R$4YY&2Y0MAT+1`LC
z!xPaYISCnJ@<{M-Y}APO|KJUb11A+U1iOj;yj;P<{}7bj{WmsY8>m0Six&G4=yjh!
zz`XzLAM6YW9plg+`k+5}lY@$8;*P$Rp&NS-hWZ=0CO?SaRZ>0xEVskpOS`FHhQ|3x
z{f>8TutK@jF&>ZI?^0r3C*g;rldSkZP1Q>DVmG+nd<{(+b18rf2FMAj3mFCfv{a*C
z9qmPq_98%1%y~|kr<4<SU_K<B9iovbb(OA2r-w-&k@G2rRYSV`)ptWbjb!)*uTta>
z5v#+%b?p4GjVn#c0#25AopUFt;^aDN38{Nk(%Z$E($UXEMMbG3f^echHtW)w6B-~2
zC*uFX-dhJ%*@b(<1}dmXH%Rve6r{UL>23+>?iK~ylF|)Q(y{3VrP*|YgaVrm>4tZ0
zpXZ$U&NttC=9_uv{qH#A3=FgHec$U|YhBm6u3s#+@~U`Y>^`@FgN(in_a5koM1gc#
z@z37RT6;J+!XkFl8aFE&oi-O^R4ObiE&nK)6lmv0tY9!3g@mX6MJCJq61w~Ivl(;=
zk0ib)Sy%&JpYI$ksOyF%8z%<pmc>tI%Ktfb&mn<3jR7ddqQ7?M8=XMMrr!53B9+%$
zN~|(aZl~}#M>$+NM-9e4`6625@34HqDi7CMa5hQ^OBT{|+YS8!P*O6pE|#=Nw)m<7
z>t8GhHWHQfwrM&AB|4-H0Z%~mA($^Rq9x%y&SVB(uqEYaCyrA8NhU?GP(5Izo)>{n
z)6Pn4&a6vTdSj$3Oc8$M3xCl<zmN_Jin1_rv_xr&O`q6Q9!D`NwNe%~q2r1`M2|`O
z%hbQb&ks<Mwh%<oRB2zi>|$SW=UA>xW>z$>HC^**;=k}VXE4puD*-S2Ma-QOK0_XV
z<!7X`Y_`c&2?Tu2kd4uA9Ah+tf62Ht^q60aJrP}I2m8YqGys*u6rHLKI$vhq(vAVh
zIr$3%T<hj*-%Hsxe-yigCPwq2)R_GI!-L7t`!1p{{=Q3!ySJDCmU}fe{+9hZp*Nb|
zJ`q5~dxh0_n2-}41-bkv9A70^^4}w1Gl<3Mrp^Up?<LIfPXhLS4-F+&%WrRsn`SA)
zHX4ZV`YKTV^CyudaI&Y5ATK*fZ<~;xqVWt_iKZjF72;;dqH-Hg3XSIUey6V9CHNDd
z+<G=t&2zTzBg}vf+?-+C^M4jF8Wkp{@wv^ow&(Sjkkig8UT-|L?g#vy!GHejh*SPi
zOQ>$E975Qt_M?$A<{rjMc+K>Eehl~zG?+iZ`e|oSB>`eKZY#P`L;(&#wDRA|u0*NI
zZJR~85RG+NPVMN{n*B%^%T@l_Fl%0iF|zq-D%uI7Ud^Y4nSFJv*XjHRUct<fXn{)E
z2k9+A*X^ntxiq<oRiz(ek8FqHeJf07nLPK~OQWf1BtJ-pO;_n^3~Do{8E#P|RlIZ2
z^WPT9>A$he%o+HiF?(v0yNjSzX?Db8)F>_Qc22!MEzWt9W+3EyP0|}5)EXo3ush=K
zOACIF0JK3tUBnKXVbt)jO*A9CxmAxfy_M$HhM%JQG$Z<nIh)LQ{S%ieNi<Wm0f=^`
zSyXuzFCySTGV5BTJiqr<D9VSB_hcOvlu+`WS+&RbLj<pk&uh;~Ov^4B%P)VXL7a!E
zbObh+We@%=ALp9p8avJ&uWgo8T$cIgsAjnFeXxQe1cIF8f3E3DH9pI7pT%%KHs?%X
zfaxuAz@ENL&r@P$>G2(LV}>=UeMS&Gbc^~DONsWJ-FS?ZHh!v?qs;9~f99#*ki>H#
zS`NR|G6`Qf%Uk2Rz}0@LMDrcCl$=oP3ACg&<8fME_uXQk5fl{5{F^8nh3x~ue=OfB
zEh&lmOA<xdgD<Ah+>6Z?-y;URO|P>S5e0ea(QdXs<!36)Xd#fWxN;Vc<t!(2Q_~y?
z35lNK84k0nGc=VVPfkYt>beIdxw15vsbFWnKeZPCiu>x%IW3|uF26R!mT0K(gG%n%
zUa7rX?9-%Ce$vXDseyZrw%ow^uY)Lw(>T*WQx{KpFr8oRw)UeUM`zxJpip`ZXe0Oe
z*Y5SLEBe0+oKdyd{tNNk$1JbX+PzsFbGqDp13r|aI#2^yq^vESkN@2+)gz7}DAE0p
zFaTMSJ7hch5^E`*qms=Y&E=bXNB8ak?cGZ(R4!2Do}D{)?wf#5xde~uAFGtfB#H4g
z6Le$TwD^Snk~Hj7qpUdHyLOU1W-hG&`eFrzLh=RncXEiun|6NsDU}*FQhJ64${tI_
z(K^?7OVTF{>178hYJhrC4Mj3kW}_?DU#><^8xenDv$uGu7UINLpq7^!nD4SMz<e9e
z5T}x+7(VvZl|?p<tD5wDt<J^(CYdZeF};7=*X|wJOI~PS#QMf`>w@ee!@5sM!X(Ab
zhGByvcg%XW=51c8u(PNW0?vxCj{S^f8@b2Yk~vv<lCX&QwM*?J{dsoofwL@N4bgZ{
z3sTmuuOYBCC=%3)^(S9G6@u&Xybg(FWoUh^M+ZfrcG6k>9u1QUIJKK+3(YHzU0d*-
zg&7i^QdUG7`8P{jGd0RDlHA^^mLK8{mR$d~G$nEavYax~Hc#?xKE6}r!gjZIUh|&n
zVITdaIiOn><omJPs@~Ufk5oIDBK@UJx`A=Za~2xu^zD(k3ZGT`gFP6>=stoA?Iz<H
zRX=5~N?*@+9BdyJwtu2IRH3cHn5eVqgnXWP#%bZFcct-#PH!rg+GL5oH2FiGfLT{7
zcrFP+jsZ!_A5;g;Q6Caa@Gb>z4hx)q&(V@~wl7Sl`EO4wJu7JF@%rvJ^06^u-9-y^
z08YRPCoigz8>tA&^~{rc_8m1-ufYn9R>ryKB%J^F<SV_G@V5ttEB5BSNil0Ki%WQ+
za?Ki?>csn`f@~7XBkT{%rZT*bGe(T0lU*#UcuJKj!^UmZ#q*Y^5L3K;AytUnF-_~J
z&->hrcm3!NipTAg_0a~hD$L=+6;``mj9OH!>C1o8O>|y}IS{C`q0DhM(Ij46lQ!c^
z_^KAFEwfltqhKjfyL#k=pb{|a%IvXl5eqSE!xGi3)nuvs7Fo8`PBaVpFk}jtI*&t9
zrh@k<6B}DbM)vV`+xuMM)`q{#B>3?xCHY;|a*Z(3il*+|-7e7Tx>-_po+`NR)2=G-
zzA^m!s?1?N`&VX3xhF(<nwMei8MbsA<E`_Y;t=6N12;(8ev047$Tw;E<19+|F!@^p
z0!nLqn^@K|VM?M(QwMIBu)U#lA!f-#JoAw<l$hlQ$ZZre?LBQLd&DQ5DzBCmW(T%o
zuiMFXC!15*nFceGYjd1?cua8#O|;JPu*VN<E2vmMqTGuSc@cu(mkSd->|1V-!70Ru
zJTq#FRa<(7^+x;!FTq<eH9|4`DGIn{LkKD#LGllCYT;xC46b4t3_-n4%6Q6$9HuP^
z3Qwa-*VHf7Rr4PYUs65m%aVoX8kbb4<RO{~K5=|v8&=*qVH4QYfMkED0{+uc4WTj)
zb+e7(!PRfb*fmQy>YtD4w?DZf@j0!t8pb8w(M=yuIV(6*DPVp?B^wV(YFC&Jy2Olf
zgZ{*;oV1`sca!<v1xJN;Vcn<qfW*K2c9?FaIJr7_8YO)*!S;e;Wi9+%T}=Jkoj^AB
zy8@QMT5h{r+x}}q7<;@-hh40bME9X}-eVDDysQJ8)k`pyg|=l*<naBuMS;-ACzR+y
zGQtUIf<kdeyTyN_pH}Uy^#Np4l9z4vPkhE10B!lw?ft28Jt6$6FBj$cU4AG+oXO1N
zrjAypj-qgL4pUoFqQdobHM-T}Ig3tv>{p4qOxbUgN5=bS*f-VmQNJDLvGt5y|Kx8G
z_IshS$0Aw1!w|R7q?5ZK)<&^2W5KU37=G1xV3ql~$EmBBC<nn2$<xx^RIfI%Nk>M!
zJ`_PNP|&4EQKGJfJ(WqxdA_)8el)jB1ur!|f4#z+bTtoYuF*Sp=2;cYQmGxfCZTP9
z<nHe6Y^QbbN#W2<A+k$SOVNsU@Tb6(m=9(ACk(>8>_O;P65#rg?-EXc!H0L5Ioj~p
z?YGK<T#ZVrgXK$;W_);h@j%*MR09UgrYhKWGhgix+peCN>?tfP;Z9wGpKiHmM4`)N
z{E$HL)cV|)CBd6e%VaDAHIFfe;_>hIKdn?hx0hemdDBGnVd|e09;ZB3tHo=lFi?0=
z0`IVoI*EP$=Vi6;wH;$}QC--TWvpSUE6`{;KN?ds37fC7R4q{7@=Hz{dAm7Y&|nnj
z43A?@XgV5|_k*#uh_(ViRlV17;jRkISkm)I`84Vy4L5r_TBGo3F)2^AX5Zv%t-CnP
zB>^=+U=Eo;aWOsari0WJ&jZV6I~B&!64%GCB$!Xi`SP%`N7*|{DC6z*8i`7%I8Gi@
zM^6_XQZvz>7cclfg(dLW7Fz5MMYB{Y6a?4xM{zg4vU6{JT)EX0tR>Yk#dFb{26TZ2
z+_duwX-JGGc`Izf5)_*;Uz`w-bkV9JzF8MlS-G#BP}zE3cIl5=E7e+D+GAw~wRyj8
zc5uLhyWci4i!VH<jk8J^+xP|EDYO*B*Stz0ymV~oMuSuw(ykz(25YUegA?;TAfXb>
zlXF)L(>gJ=#f0yl-!wF^QN%}$`XNTudEGag1SZOSs}kt;uWv!bmzf%hqEFGQ_fV(v
z<J31iFwLkL87*;qJ(;LTColG(e(MUE>6%DK!ke(<Y4m3=+1&44-|ALnAnVG}A@6fL
z4id5-!VURvmCP9znQ1<7*^grr37H1IvRulj&7xOG{nF%_odN&s37{=>*;1Xj>$GyQ
zUqCBK>QHfisncvIT~HRm(JkPS>98@{ENBlf>rntEM^lgfISH_&IuBGb2y(U8f<F@l
zPa&b$ACS3R6q8=QsUNLH+D)laXXa|8A>@-AdEDBfzm!U@EFOR#ghR+R*A|3M7sj4_
zhno1*Fl2^|Z^tm<5oF<CCcq+z&_6gH_OZz2yc?4&1(Q9TN=xzBI!`rbL@D*Dx1$NL
zl&xbQ%?;PR=bc?<*qo`FI?uV>DRxN^_g+(Scvx=?6pmM*Kg9odGR427qk<1^ES>dh
zLrB($KJI-Mr_W6(jPPAM@dsVN^kQ^lUZKSd9v8V#L^`=kl)Q3++2|C9nnayQ)zc4W
zgxzi~xF_8v2Bs)qhY3RZGvCh3z}@|lmInPDef>|4cnkCj3y=10zSWBV`D^Hks&g+M
z3m%oHDAP!wD+l;BH0p(y_FMFRFU4!@!=dWK7eA~WKO=n356~iv9PB{fFvezP7mw80
z4yf#VE}iW!tCWgqdv4;n4$^_M$$LhWZHoB2c8KljhC191<Ne2yi1<RTry~zw>!h7w
zEEU^K@sviGlpvlKV=L0Td}rEL405RVy#5xZ88unO#6ol#kL5Vu;Fp))EmLq3q$O1J
z*yFmGYTV;4@JN#+8<D!;PNs~XV$+ajDp)cpXU;41X?FOb!kanT7*&y4EAw1`&idn~
z>KpaV(uKN;pO<jVIjI#g?;<S8?pe%p2~PWMODQYv$M1g+8GY<&p^X1-UN?K+Kp|I2
zc>1exvGSB5ec2b2&7i=;^Wj0`^I+opmpvm-7c)cZoK1<?SpIy-$_lZdZWgL}mpTv@
z>#fEuox{~qP2?TFe9&yoX`?o@P_?b-DG_ccmd4m@a%lHGX)%0uTzz8D1pg}J4W!-f
z3=gkATyXE~X@5cimC+E!dNH#X{kM4l@}a7c?l~e~mDd`a)~0>uV*s3$kb-SSM6j#f
zsWNyb&LbM@8gKjI@ZeUFy}A136PbthX}AemT^^Xuo(FTcp$O0xjRV-v&cqPo8U70f
z(-<nr$B`$ap4|7#KxN25x{cMC=vG*}gh^EV1wS{`9Cuhul<`##u1TcB^`GPjJ52SY
zab;<SZz$hU-V$dqiJJJc6QgLfcU$p~nkr|WC&<Fpa<`c;yrD_6Ou5t3t;8u>ZfR3k
zye=V2tm%a(kfP-e<>o}4^-43u_nx3nG&=fVOT>ZIaOCV0xn#mK``Co^r~eNams+mf
z*ON)HGp|w1AVUGOpt&YT_{e;H5AL*4S@phP#HSDeAC*14se*;V?+-~21JzvD)LcwN
zpnV`L^JQJ1&E*H9XGXpwy$&?ifi`5lpD8~bPd7XMaPKl1W0$Ykr+S=H%|w?(OBDJ|
zJ+6?lf4&3+m2l{3D7o82=kMLQmTW@{q=5IY+qF&GLa(#tr-84vIl5jj8jsm&^P1a+
z?ZIja#8=mw5i-2e4;-5Ur^`axGAUBoX3r~?RbD8*J<s1WZy}5@`6tF;mIs1*h{#Tq
zS8HP|M4dgDq08{=*~qUl{0yyhLGPL>yZPCd$M5gnU-^h31br(909W?T7|#V59z7!+
z&UyiXyq1fH6hti7-NqBL-77V0-6iCpp^+fqa<(h=#q{hZgyp{dI8mzKNW;#qBE8|f
z+LRziU<d#UiQAz6o`&Y7%A}T0aegSx_GNkZqQfVWygVH@UACXJ2{^)0*xhr(f_FWp
zOs3@fa~ogG%T9kL$&7*%NFnj0!%2=daIA<UN}pu)vkGGm)FXJcZ;9vQ%+x7yR`GNz
zNy^y|y;{2k6|sZCYv-*A!vr34g3Gu2_u9(I{JBNuI;Z}eVw`Y_H&Hh8X*+ek@P5bg
zG>v3*P}v9P!QdYi%9`Nuy)Uth#@7o_QsKFl^ir{1TitIFBTT9#Irw9aoM(^BEvX-n
zv$RxuZeG@{{_xw*XiZ{NEUb*uPogyq=i#%i3i+U^QE<qk04G2{-P+Y}H0ybvkz+b?
zE8IRyaNqf_OkO$fU3n<t7Tw1nt28wRqMa=r_g$!<(`jQgx7tC%8vr@8@NSWi#$ugn
zIsdh}A$5phy_2LNzsveCVXY#-fgw5x3}gR7S{EjYH07@^j>)cxxoj8!Ha*g99^i{}
zL9abx2;fku0P-S)GQLQyfXcr4INpT3@-2NxbR6Id2D5BUl_lh5`00dtu00G8boeFW
z2^#j-ewV6{OR|^r6agM+({CX3lk<Zg7-1&b8yDG3vrQ^wy|{)?{A=n>HSX~<&KkDb
zI_J2PDBlM5hc<UII0Ro$IHH{dhJ9-Jd@MLxp?p>(x|P@|ien51V%)vN>anGVMW+=H
zd?tWUk8He4=E3}ByD#TIw$(u25y&I%&DM$=TpyV+svH?);!een8`ql;r5`Lu3R`hJ
z_xU4weto)l>`%RECE-IE3v>4zNcI*MNbO`_SfI+XM%}I3{R&{`@G8r(l@nXMI8^j-
zI)|YQVK(iGIe*7tn29t1jD(ovw|~Nr2LOvKfJ1ZNY;fJmc{gxr`H8_`vf2Ae`g7Ce
zZX;jx?f?dn`w8KqD#TkZ<XWap7}O22o!=B29YiK<b2DY-@oW}K#XNFVt_tLmxQ!|5
zsVxg+UsWvLD=Wy(9nDdU$#3O|y8B|kh$uu}o$=)M!3uUCUCYho%?!O^3$KJ6-r+6}
zli}=-ztR*Gn5aO;^3&@x9j>cpDfPApZJsB0Vss$XOZ+}(4x(1<#1Zjw8A8oA3&kNh
zYK^rCkGmxRc$rfJ7Iyg2dc4lxjpiadnmfXSy`$RE<O%>rqN$~yZA_JAAc<xXmu6tr
zjMmudUYl#a)X!5OI|n%IBlkhe>0LW6)LVci*l6tl?d}5AA8906y_oBzaV?bVl-t)6
zKmk}XemVNR`QN7(yLf>juS4rKeI+lF7gPiaRr88FhSn0A=(QjA_NgdflFduTt|$H4
z1<IKbO-C+<{>v+8$Rh;soBS4vpf|89DUy2E^Mz95D*)#MR7!81<8qatOiZb6Z7Bez
zU_?~L0@No3wNw~X9Ri>whAkJH%-uRl$|P~sBbWu(>)DZ{i%5(FF{c$x>Aj!l(@ax)
z6f53G>zUmkPsFTV^gT^89ZJ`aXVqsKNa2p`8c!?(aL9uJ?nR!KcHXhGr<T9h55YI_
zw^)y41wpjR+w8gdk(QQDM%aI#pvEtqIQ<zoao)N6zKEc#H+FAz+E`Ez1zG<*OL<>l
zd~UIV<#`gW&`_?aHvsZ{h9a2ZDZq+HfncW3=d%8F?Ar*YukXo>xd|Q|`kdMcAmnVf
zMWujdAr#ejpvLwmseGESe@m6kj7n>-luk9)Kv&DV9OG;TLaO<_=F{|LVrt?Yrg%s6
zLigoGDI~T60bZ8d)j5EpBxo9VN=o-&F-nWys^%%k&Q_g9*5~Ut)==2~w9ry4)pI0J
zlI?#Z2TLXN0-GteA}DI_KjpoLhJqb1-+0oKuSh3%Q+_YNl*{(7)RHP{B2zs502%WW
z&7qiWUK7Rd53x8p*s}`am>2nDMxn)G5V3qQJ%c(2(anh>0)VvO1R&7Sk!gVcwpO-+
z8fjvX7!zeQYRoQ_zx^n1G%P+6SYjPJ#aj`kW>0F1n4_4(!s8}BF~K0=f=EfsY8#Q9
z@JAl5W%%=XoL#WyYMpQ@16DK|k~{mtVEfIz0EJ)ut>(Fs%?=y53%tV#VwcD^N!uRE
zYdrY;qK};2LNiuLV%$~;IF;Eq?FTP=(4+RY-dxB(_xQE2kN^AkUyw74m<jVAJUqPI
zpekn+URE}~Py`vjn?#Y;H6T)KRe_X>YqsMvPA(Y22w~v7p7V}c@@G9|+FUtF2KAdu
zuqlQlxZBhir$M<cvy~y2ADOyo{#Dct5tMCvhVqA>R=r}J-RX}6Kjf43-_lSk_KP<V
zY5lP1KWpSs2MBn#F7GHXs~+UvTW{`Q)+iqS6BQip+iW4l<P75JWgr2I&WZyulbJ9e
zc&Idcxz@YwB;VL)^M;!F1am~7N1?Npx7Q0(&0_XQ0gq{bjJ>x#XY}c?1%X7Zf6<6I
zvdhL}RFNPj$1`k)sXur^*Q8TG@6Os;K_3f1FIUn4!r*Vi%Z{cMb*IJ;a7Xl4pBX~5
z**gU)#zMHp=#Z_O7oqPhK`D4qvkxPtB?`ztF6%b}iFi(y*%zbGiY~f%$Ce7`Om@_f
z`9^pCtZ`85oV<)*#)cdEo#!s7+Pq3_wMpH2Iudpmot9!9PzGST>%OB>?b<__r&8ik
z?5X=<hlyA$(#}H9;p?9s&+*)YTL2u`!Ov@8I>22;nusERG?K!ibECSei^b!1RCly-
zRXAQU*`~3zWc;^>H-+*V2afYDGG5YpP5y**W?4T5d4o*WgqEbVcqv*7mFAJL5YJYv
zBxkEvC=4OKyyWDZfYD+(Q5a`(%_Cf|lqm)~5ZUBbX<NpD1C|vQ>MrQkJW(mPo+!*c
z)h0pj#+XtR`423BKBaNE%sz;pWEuQ<?%Cy905pbZ3%DP#w~tK$4rVPar$-*RaKG<;
zqO$)|QjMjt%h3Zk@`hw8XG>N#x*KNRz0v#2K0o`29Np$J!vx>%(@m3zCF^&e5rCxi
zq|lh3dyT;cfXd7nmtVhI3$9v2c0E9Q%{cX~fcJ@wC4e|p-3%@h8+K^Gh!;EE2H9=8
zgv{2gCfB$0Nc+{tm%X9u0B8xHt9Q<x`G~s%P}P^q3uD0M@DiQn5#8NL&}{Jz1peRv
zP59-r2E-nXX!;aKw<I+5ug;Sp4w$OTJ^784x1F9X6)>gB4)#?xohpkeRBI#{Yz3dC
zR1hwcAcbe6m<+p}sb_oaK(xjH)`qJ$Kykhd4toRX=3?vM>Tm|<FH%DcGCnmxOTsHl
z!W?GSER7u=R#J>#RwoCrlk$Bk>_5qEvq34QLyMO=1|Dx1HEPO*9KW9a+O1$)`dXaa
zXaF>_=&E(qPQ82#0IAl<_g+tQqwlpa(XHgsj{anADn1e;DKd&&U&1G19KBJ@jbN<k
z7Vs{0A<|M(a_It|q+CylU&$aLsn=_uD+mXGF49F&lv?X4h1+_7pp3Zg0MJBywMr>|
z02O!Hozuo=)g!dkm&v>VJHoMN-ypiO`I_K&m}C{$-<UvQKH__?Hso~Yr>07t0?Twl
zSawm^!|(jgtFIXW1S)I`Mo#>}6PztywzP-dEq7SU=vXMlg@2~)*^;Gri&CoB#FYK$
z7E&<wEjmn_y(g&$^fDo8tZA(f?$X6rJJ4e|O3E_tp|N>*g7hfh3jnzo9JIYmPs%SJ
z$d=e~?EHn}bD$>D#izJhQv^LGaKUX2vEIMIhu?g7b~nVu991VZVLLgUY_Vxj9TuhG
zdv)dr;JEueYPhQa3Y#F}k^sP=vm>f7^!GP_IyE|_*W{rr7t4@&s()!6Sy6-z2-Wp*
zV2b3SXbkDH@pJ7(@z?IjfUbhm)~c;+yg*W{XL-&m3w?N=vm?@tato`@bt|5f&qE78
z+`v+B*#Zk06SbgVme;?{BUUMNDX{1!?Chdd>a3_0>e^RiC^xGDv^<do?>#~LPnB1$
zKxp3`3%!amfSI5yfBj&Eka?>xtB;6Ky(Y7}o(3DL@M|!QzruQrRr{so{So9@<qleh
zc-t$OUXibvZ(vj-KE&8_y8JCN8{`|*=BnFXY&CR1iPI+ms?>sTSM9X6ij5-BV#s#7
z-gK3XVGYM+O)?~|Ei-qmv;<NM?;}xvA+8!WP9vgB|DiiT(?GzN+zsPi^ifsf{aJs!
zJ)=sIwKE5FytnmB2l`N0i`EYa^VZq3`!r7QZK>}rB2is}su9SEDhwK``1=J#T0maB
z`KxhtV|F#fxMk`cSbO0S1koZ{XhwTZ9ea(rE)&>uMX46aq(JF86g2pa&O;}`oK8f#
zp_A^ud?)CC(^Q%h-Zu@{X?9OCv^%yXQbb{?7J=XcO>{;Q2}J5A#FT6_0q+4)fV8|c
z`!`6b00zRC7wxTcVIs)PJT=ck+k_wO^$0(*yR`O*ATdRG&VWsYfvbRJTD0o4gVX>S
zTtLi%P1Oh3pw4L0@J48Y&o%$(3KgKZ1Jlb=a<v@!B2DS}K2(2+vfQDQZ^qdIr;~jT
zTNg+o*2MAGBQ3&&!Otf+TF<tKK!<yX2d4Q0;2P*tiOP`U$iDZKAHWrAvNh|G(Zvus
zQXsk~P=H}V`_Vaw21G4V?`Ulkvw;sUP`Cluxs7z0(;M(CEp(W%#^=a(K9B{9)CLKn
zsKdZQvp3lkE+D89EXx(T0mYwB1HCcy3B}r#n*|`Fvn|yBtS{W@WQ=xjfppM4T=;_r
zE^HXJ!lMDgiVdy(Fh01AB5YJSSd;Bft^%dZ58XoA^-j4fLm7q?U&3o#w=|F_*lrV=
znq?3a5WVyn<L##Xi6fM$AoH|GUZ3Fa^?|Fx!u=b7+t{}MLAv-9JkL9&wN!B5NGX?@
z`;p2J?>oJ4;5lOOVcE<MSgdaY2%iUdKQrP=+xSXgobUJPze2j5(}2%De{Tq2zqk-B
zC9BEejrC@m8Foeh&3&YK`AuFg3IU>QvX!Q>rZg;7>qC<eEBHhc_kX^$IPj#py(CpZ
z;9^LHi8ymW0HFqHkI?Z5z%`qKEs4*J4-f~j!N0Y$^(P~tzT*H4ALh#DqGvplCv|&k
zM6Z}03xqOc0lt$KK%H+Af4rpYtT5bvc>D`^ZS44EumVSmJRaUeN#vJxV7&tB7;U5A
z%LH-Z!5*)gh_J9I&H7?T{3QZ;!2$pFbi6>-Y@x}M-Ex$s-en{D1_DIt4>s~L<iK8<
zHUAV@10^u@QC<b)=AA=6*G-v^57AcuGdTf3*f;i;u;Gp~>bY`w=UZj1OU>utQ!JoX
z;Bb`0<Ct{>jsQ2;r-t3;m=^$d{l$I3Q<}o&9}AY)Wg>7>o@{3!U=f$eIFT|Vr)o5}
z1z=*oR|hkbdB_7>JEH81#<$l=q(69sq(O|6N#?RGY)lpO){wSC7bW}Y3fdO20PoSw
z9KT%g<laKPm%GifI4X5k_+*hL8-R&hq^^puM-sjiQ~CCKW6V40pP|7)&f#bO7-2?}
zb|cM%OIFZlfgp<!QjnyN1E5|K05PL$GaH$*=Sk5^K&Ybu$<+i2Shtz08+o}4giz=z
z-wllVKj(NC=CU<8us;td7Z4=V00b+kP*iduAFao@44CGoysVAso~uM@ayxV7r?d6Q
z#IMX69Q#!OgST5#hRlBKTe!#^zjP9y@iGnxT^1|unT42TEUg%U+4JZMU}tAAlz4MC
zz-_*9zl)B9?c4#kX_p!(W&qS%4zwWDI-#xaDH34>0J<D3!veV9fa5AXG~MLs%px^n
z;Jw3{_AR`|435u|9A4wGZ|u1eC5;V<*G~XkUDg5sM6Z&md0A7*f;lYnQZa?AM5vvJ
z@ptgAPhSlh-AWYgt^a@AjSxP3@CSi72H^`4p|fB21)rcC0qoL|e|9242Lo|dZFdCO
zn3xwB!wR5H{z)Cs_rBn>=+MYPOoUylttO60rAUPRnvmq<i%5ngGl?$%eN=X;OoHy6
z8|7#FDl87*{xjxhap6MZy#actFVItJ1Ixe^zYL-2h9Qj*nLR(&AVV5Z5UMzM4Kkq|
zsc>T0MLu{s<Y_vR8V(j`Jg95&bzPkNw9i${5V{x|G$!)D0pRGB#=VwMcY8<UPLTX3
zJg|0PJ>m5DW8}FU&Rz8eEVC#e#OvYXPbC&!<zmSB&qh^>oWF|y&HO#9Xd|G4DGKZd
z>cXzT5mpB9cC+URWCEmSPmnoI2E7!7+6knK*4lLrnI=t0G7ge4(Vc-TdSqs1K3~kx
zt+k5-x8yyys4ordVFJ&ZYr)qx({^v^AyH}R<<#KO`Mu(-LCN(G5-W_;aR91Hv>;o4
zX2@kZDiMrM@~z0EQOXO4IzGey=G=mD9GoZO7*)S@@Ep$6Cv&QGBV+pFa12G;QG
z<D%SHNbB%9d!%(ZiOw?{Bnt}Xw=)pI4R}upw&o973pKVWYWYgv9b<|OnsqiOONwrr
zOnb2dJh+l;jXj6IieA{5W@qvcFxh51Bv`l4NnurhI&m*J1JeTZSp!MOA()-V{$MB5
zqkEl*DQ-i*YL};_@Kn%%gdBX?0{Q=TWzs<r)Y1_$-#<JcJ4G3MvAfXRXfs_dDs+2w
zXwl6HDh7&xKo|G*o?-rv9X(|doH$l}zR4eYGNKv$?xoq&1Q$yTEdA#IX>aXl^NQ5|
z!OK0k6l_~QnG}xH3fp^RhzyZ<vJob<V0~#EJhTNFcRAAK35+T@M&;VfR8%;xDb${J
z{`uXWD;@b94I2)qwGH_R^gDpP5iRKL=382UB^t5Am)aag^|mtv@6c2$@)D_B6GDcA
z4Eh0W(}D`fu)w>Gj4=@gh-743e?!60$v%k1s#%))F&C>j9Ao$U$43ty)P)7BS(!Xt
z%T55qaXKEqUJC$wivq!QOPEiyT}pk!ZD&>zr=KVLyqnyAT)iTkBQf0i%d>e<D-(Pc
z$Q+R_7CKN0l;GA6qXaEMDQ@CH3lcq6UhRK-^O99O_8gp+N2On%H-K|;GD9~9G87y9
z??N#uEUe0SJE2-vyxIzCm+G(g?KKf2(2LrTm)M7asU;1HcX?wq@^MUYU#(`)wA=h|
zuk%i&PM^;SQYOII&^(X~bL!XID|f+&Vo<>3+rV%?<4G7X0-+v=2O2;WuZ)9j7Ktf6
z4_7(wV&oms2LSq(eEnlHM1kh)xifJD8Pcin<M_Ux7NBgCe|3@>1FIlq_Z~44CvXP^
z!WK^uX28_mLjJr&%;k<l?&s&2T1_6XqurnK$wFgo9~ZX8{<B^|hBP}`Ap5VWjFBgd
z6y{fp+pDf8b@Qrk%6iR%4rl#luCn5G+<;NDGN(7*!Ry~lX+RsNvx0Yyx5yvd#AO1T
zG!RxQe{>ffiU?g}>;1REj>!D<yyB5-sTh~t9K*j|cKL6`M6!V>J_=ww)?_}qiG(1&
zx;B>(D8Ov2Po0o9HbPY3M#5Zoe|l*}><KaI*4iA7RPct)pfIU1v7O7U#%JN}W^miq
zyh&X&03))4P~^}=`g8dX?y!N$bwdSr1f`d42?~l1_V;DU^#<&DXmOm@lhdF5l}d8~
zYsTq(QnG;T#z?6&BUFJHd_=aLm+)Pt*0YCfj~0o6GuY4?QjSdI2O3J4R*M^=!3J34
z+UYm4w0{aWl5_jvYkPqw)9iN>^gNo~7OG~YCN=s^(WK$ctF{*8{cQcrF(B%6Qo&jC
zkp;94UPcDzXJ7*np~rk-Jaeb{FtWMgCSfsGeySAv#-c8kMOWj;j~_Ok&z!$5>hVY;
z6B8;bDpO=y@@|{3VC;aBQMvc(Fz0?6%Z@uJi_Hj9pZo~}8@+kzG;=waw2$IjEm%O}
zz>9KE0dgNF#E%Th1BrTc6X%>a1P3N01DW00^Vp~Z@#+<br*Y*}ytxiZ_aLi?ctU>U
z2JYk%4e-i%O>J^fR6%8!j|O-^F15V*Rfhox!PTdPf^yrRXb};h0XNl$G?4rNeV=Ds
z@BH%}w&#^rv1tn#udG}gO9q9N;WIDA(HvRo=8M+cvPQq1<fc<AdtSGdFFu0CO)GtA
z(*AyzlPs17c@;<gYvPup71cL38*GOwhXMBRg!V4EdO7tY^JV`F(fHd2k*)zf+&nU0
zZvpCoA3yu3S$-^UA5N`?A9Q}&>lr$X?_95BHcr7Fb<cTyF6U?<ywu2M+BLLn+LOqr
z(xjGz^3rkVYF2(e_c!%GZooyJ`t>8T?XaL|dcy#e$PF9r$<to%GI*6_5spAa;}eGX
zd90<=0OSsZ*({X;CQmd{1rPZGk;fIWHdUC<&73w1?g(Fv4UO+2F~x+{<cL2Y6BY&L
zyiTI<rj$|5lVxr;^woWWaKAi__E(hbBM|SFiO{Rh4nOX?1;oR-x1)O{7)NUv&pcG0
zEax@0e!cNj_CMJSou{!6_%ON_e!5&`vbdm+O`NPRR=EbPRP-?>5<3GTFSD!DvpYX)
zb9bn#k2P6-3a}0}kF$xs*5en4aC_t=y|O(|l(3o4>U6g{{y>>IFtZC!VC@@e|Ac+z
z)E>jj7%!OFA@r|uqm(yzwtl4N*F)Dq;L-HU{a$*AnYs9L&-~P<XGgjP+GUwH9_`6-
z>|X1&a@*;#a_U9B@?}jdGtR4{Sd-S8Wj!~|jb>{nB?n)zr<yOCDK|#Vhevdwh+Veh
zxYLVP8s)X2-aK(BIg6U@my-gnKP=A823^QFr9zHO!mSI}|72Hqce|bY9hpiWotIz;
z$6R1=#|ZM*!X0RBA{mvnjx%*eD~GJ38J+RZZw^IuercAeYd#~NrWf`({+uf8Ek(>}
z%ZNETbi1d{TS)a2+Qj8s6%NsTWmsW~K5_4=O6R9JQ*!u;>D2Sl-y3!>x96JPmPB~C
z`~LQ4e>d&U?)m+3+F*D!x=&@<c<G`Ret&XdS-akZ^;8T?3U_Z`Zfib+w~ltTOB60d
zPtCyZTCi8M{0pIRy<?@uf!E&JN=ZP5WvR6WB<iKGAAJIwe)L6nirneKHE86ISKQRb
zg`8_O?=nqT_f<Au^gZwFnb`#c2w!m&?!C<L$u1+mm@v|=^$OB4*Vy>eUpXq!ls4Do
zDSmOfO~s;9n~)UKy(KI>Qs4N>yUModlp2)w$u$*>?jb|)S7$k74g#vpqz<gv-@&v@
z#{@DOLOu?y0#mT+D$gZLTD6qEoRRiw)Jv-~9$E!ql_iH{>>+!`eNC;F>y`VK*B<nq
z#qFv(5Ri$<KiP)1-bTwhCv-s>NKUB3&j8Tf^rEb_cf((?FLX$t^wRyYzt%g50d*dg
zqD&bp*~;2EyqafcxT&Se?lx!K@(lVqg-54UuO4D}&7<%o7#%CjWj9^1I?k9@PBxqm
z-G#jWrPZbY=MAH$INF%bG06sWx1SMbr4)PJ{@ImUu<v!jT!X8s*VSH})>x*n=dCO4
ziL=GqnNnG@^VgqmL*!D^l}&`sEle1d8;S*{8e6aGEt~t3PAdELDl{fb^_a+8FO%^L
z!c%IaEpfW?#lm_`+sqevJDl33b=F^PzZ@gzDM_i#k<RO0>AbO*Tex~)nuvXs)^Hr_
z6`s7#o9<PU7Yk7}kixjsc#OwL7b!f!nLJOA$4re*a<V&LE&elb{l@4x+UQShd#+7T
zpI3_ylS<^P^9eWsyzKB@?}x>k;VgktkU;g~bpJQY>oe%=nCaeL>6hCR?X3U)cwqBM
z+H5J6mfh@1hSzhc&gW_%@`7JNfnYG5H3NgJr1Ky;$hn$=-)*J5YUeCW;P#*hRys9Z
zE3m%x)=5S((B1A<S6YhFgP+lnFju7mt5a&lGakG4%pSS{n2H+=*0s+{O#6Ps9sEwY
zuF@)1DG3(1l&I@h6C>GJhDx07o|wLZ3D_g{qUmgPM61rSYYn67nYrYAET=t0hgW|m
zM>O}@pZbbAt@N|_F8C}0L_zaB1Jz`%zOyGHEplZ$$-i35pl(%BHi<nh!f2Vc?Mgib
zF|TPbYB@a!ER?Qhuwk~L__r1^B*MO+gMMCnQ>`ss#wL-L@HsoUw?(k=_{QG6F<cNn
zeLL5njLs5)6(sf1Lm`GMt!a48l_g&I_HC?MzRDZ(M}gcvGKalo23{b&xQ4Bu;3G>T
zl;*>@AYXzbt2M>e7KBJeRa6=c2!@Indx{+T?)j}4fw!0LL=ls4SQ=C4>Okyx!5EK3
zITq!O>$S$Y9NyW7^=q#V6zy88dY|m9L8ugUA(8IKGYM1tjXCH=jn$Z_)b*e02x3F^
zThsnnZD~>evjdYPeqX0BN~zb$EQ@}8N}*;F8I*;eB2vCx&euCjCr=cvW5Nd?lvCB*
z&|atSLFBpYA~hjw1}~&ruGaO@;3<lcpN%an$8x8Fs#x`*qsoR7-=m&I1Ruv1X-NX<
zaAwu6@K|#C!SM|0kolZ<53UdH7EG2Jdu~TA%Eu2YNxyw%k_q)tBo-8Vr<j)C$ue54
zOvAzodrEmV7ZWL2i6%KpQNDPyen|MB^NZK*av+h)N&q&f=z9M0u>G$|Cdd}=q^W}X
z-C2eRIP49X1Q!YRdaulXH?%at*u9lb-y4^bmW!$h%hHs3%L+B$Mg+ZgVFfBQXAMkC
zu1p7Y_~Dl7E@Rb{I!(s=VzjGnyi}<&=2Ggm66xNpClTY8f6Bwigd}o#%f^C!48OR|
zynie6S;Y)7VK8I`3uBry^c4_!$EI60lrHSPS|u=2ktq)0*K9vggPd;7XiN4!+1Q-0
z!2D(3a@Dl(GyI1hCTO6r#p}KGM2Rw!a8<qo5$_6PU%F%?o2U)%5y)CG#CVP=4k^>C
zka;;>Fjh5kvK=-YWnG1_u%2zGg3Dr{BKaqe(V(@1G5+n7tnOd>Ia-ds*V6W<36JFB
zM;N!y?5>Ips}rTOwN7p;dx(mW4))1@nd)G3okUJH7XM*lFxkle>J@qH+=^)u9UUu?
z;XM~TBB50{g}1S_B<u-YDyf$kFi`uY(^67W9*pRgt@mtM@A;FyftWA?+DII$ZHjef
z@O51&dRDo6ZA8Pbc*pILYHLl}CQ;XU*G;w}8fy!#W^zI*yly!>&8YPaobH|`q8L|L
ziDqmXCLF<}p<y}9;_|!Ht55{rCcW&jJrV+i4TqXimt5_L8cJ|rWC)IZvwZdY^RxQ~
zml+C&yXObiIuH{!=e6f-8!kW(>}F$a<^pkVv^>h#phZQUjK#2eTho?p@;9+|i_>`p
z{x!Kj$Ll<aY!#l90K*0k(LLtivrhlkiM%|yErkN=@j*9!a<T`P>}I`%q2ZLo;p0ba
zY9T%WVv@j4_{hgGu?-NLNY8{;9)s-g3YnqahVcSM@_W#`Wsb+9m3+&9Dw0D6biZ_j
zuAHE9NvwID2aEnTqu$XYN=i=_&EV-A!CGCXnAPGfN@8Bqge;_KDN#)5Fl&jzeL<>*
z1XjH-8FWLAF61{mYYibB#2uW4rrE>shrIA|f{md^Cp#0JFr^Nw?S8{5!U%bYz0PTh
z&=OOVZ?}czaQ<Yo#|?x?&{p(Sit?MwN{of*DAiRtcUlRUUz6UQJcYunljXuJuI)3g
zKaY}03g1!tUR0kxwDdONH|K>Y3X#3(_Oq}0WHpuGE_5UI#55;VI!(At7{mTFaF#<z
zt`)N<o;t66m)Mnr@3GI0l6S`9=HUnYJ`1DJ8vy4C(}(a&W|}mrYyW}=K1x)*KvwM+
zv7GOLc}0Tszzo2tM*)F75v2(W4s(>qEVR}vmD6gu@=|<3+~EXZFzQ&P1S=n~KkzW~
z77&N%r3rg$9IOnHXMf-rBwomp@G0>MUlERD4d=*FR<j;yXeF!ZHlsh0&U+mvN3eWQ
zMKn=v&i0c+lhrV~U#?A@G+PuRK23ekM<qA&QxGD~l!<drzcK$r6x1Wc@*GLf*;uL6
z1BDWZ?+c0G5=Ytg<0Z`V;4>XeEB6*gY_p%JHm2~G?lJzY!4xA)^L-1MwqU(zAd4e-
zio@WXb2kI~Pn=f70E0Nh-5VCEXsfVo1XU59dZ{Pf3Xd(L2_@){7n3Caa$}G2dV3gV
zU=Ky`!nxNuzNMv5#yc9geB}ln)29?Wk>SXy#1n8qY=?hOJ;>r8P<_FL^SCj_h5T4L
zqb%Y~9cX&H!^G=&1XYN%8?Va!nO{+BEs}iLl%$$l!WT*$Wv~|PTnHn-jKcrD@XS7G
z3&=??KPFxh2gg34z}`$SWo?j(*fphyA2JkmXlJUBN8B^NbOnJC)42el#%mbqQ-f7v
zkT+u^r6gb|o`PUz(}F{<?kLM$OI)P3b+}X`c>aFff9&Ew_rPrkDhMuo?+Z9Z+~FI@
zq603{<1b$^D}&=9PEaHPpl1FjBTe?+cO-#tT41;J7<54+S){punRSKdHZQRI6!`@v
z$um;J5n#?7fC7G2xC3zuPXHrhkNN-AvH%<&BQ)@2@eUtZ!oc(jBB@6u0KYfUg4+Q$
z)F72S4Vwm4aSEh!>!Yt@5G!`jMj{ih#0Je3Uga|n<hl0?@=LL$os(!VZIC60f#n-r
zw$6&%X(!V|aNbPmPUk>A0chqS+V=5~4#_fiz|Sac##0{PvWE|0|L?+_Ar8U=Op|!z
z(F2Wp`Ufh~yErP;;qS2hRji)-pJDqy88-ZFCY%4j0{qX+`+t;#|L@GaJI3c3_iI1e
z&ghk8pb99_NIYjq{h^rctc5M(g{&tHrE-8Y3ouE*3OOWV0iANNFJhhD3`C83JGf_r
zcY|UB%Ds=sj_TQG4P-|Zxd-X}t1SK+G)rxm{1g=KeFH^8E_6Y9Z~;_gH}dWZB>Aty
zdJD3rXPreF`9D|rpK(FXmu=xI_uVV{-ssOXis^IyG5Jxqs)sVz{mTPXr3MXjy{a?`
zuQpV?o$Fc3>b*}(CDQ5{v@6Xg{Wyx0At{pShsfy)I!OqT1N?sgcI3z|IO%%J&Lgvt
zA$Tr)OHrl^LNsT<0|<dCUp<5h$PJh$TQKXRq!k!PVkJk-UT?WI)$#R{G-|1QdmIJ$
ziw9m|fqLr&^R@Q#@o0t$2~xIOasiJH9`AC#`onOlRIskPfJbz=VOsC=J7nDE;;UML
z=JyiW^x5BT6nW3QYqn!($258YkUS2Tk`ddfV3!1DSzDZ1slnGeDavj(uwHjF9Uzl*
zJtiibLRnI^HzS81W=J(bq8Vk3cueo{%upYU3Uf3mpuV>aBI~jM7x9;zJ_o|v$rV0t
z!)>6A-od9={)Modp~S(Rpk06;EX@{5r@;_5FqG3^T6GgB0+MArH_pczGuKnXqXmlm
zWTKoMPU&LxxjTm}*(_`0urc<XPxTj-$!}t{O7{#P-d56LZMz!Ao(0%&wx`=vl#Jro
zP8)zRpf3k0i!nFYp-2+n`;6#m&ok|cJfv0Mj2*q{bU+(Ol6)~VYse)_KwH?HME^aw
z)9G$!1sj7C5_jDQUPVpgr+Oh|G6ekuvV&W@NFxBPV~QkxC2Pf5#%fj+H~s~vE8Xs1
zyp!(z!q|NO=E9KsNk{d3$H4}Ux#=V{$V=Z=U4qwpSLu53na}`k#^&i6jVtJSSuLdk
zXLSlpWIm8JW$};4IP`s5Cp*>mR<=v|&7i4qD#YY~F1!C6_VuEx?!++4=^hV_?$q8F
z>Bd(hSS|iFWM6Da-1^Qr2xv50wpCiBh#2!#3PzBuwg>vlZmzUxa(T^u$H6vAQL83a
z=>QlnkK|e&V}3dhS;c~yNhpuyC{jNf7qIB;!;2!0R0B+~(mpnv1T?Bbs7O2j^F(~3
zswQ-%KwMr_lSJ3bB&yk~X|tQdbINY_gpaQaOPn00u&&p{XrC}NQXQuoDUOxf=Q~c_
z95j;7`AY0ED5o?Uu@+2JC7WV2(yx56dj2nG*D>LDWm8alj?Bgf*zArM|L%N)A-<wE
z<52Tul}Ri6ZuO%+f|0OIK^;{Xi|<~=t>X7VRi4`IYwuR+D6<^v?yjNvS$qYp!bC*E
z82un)@Tck_=5Yu$f?TC0f4$b)^!&$6Xob~+7t1z@fChZ$<`DYklp2oN!4W(#CT}yA
z9kkLDBS`!W!Qmg{ADI>v!B)FJGL~BY7G#NCq0J!Y{15{|gu_Kz(ccL2rVX@gKUnqC
z$K=+8*l851Dn#t^vpP$$jBatA<cOgd@cC;$-@{}5$-Ns3$f@$*vCnfYl0G<j483Ie
z7Uia2^_r5!XoUPynZs0pEv7Xs_jKBLJJc#n-+y<)MEtclk2u7T$6YowX9+K)=XY~=
zRe9GbA=}dFe1+Y}o=V}t{RZK{mtEZgxkd3x#)6+}HkQui4<z=wtBitu&z>3!y#72_
z0z{@)!*#v(ihy3LvG?Yw^HC!7;+vYWBYYN>_W2Nx$|vbUr~|AbC~K#>%bb=OKgKGY
zsne=<YdO$W^o6@`cJ?y!oORq5;2oj<QTdt{Qk9j)SEy3{X>cNy^1YyJdyD7wyQlm=
z9wVF@xu_e^zdt1h6mO4L`@4|%-(%WV8IE4Z#xz$m?=BvEZhQwdYx2?NTjs8JJ5R>$
zW0?pIo_oo9t$S`a@FD#9UVuBdyI3pezB0T#0qrYPDG*`5ue+B1)wsCHK(2v!9!a$J
zqoh%}qQN&VxE?7XRN{JKZGm|GjKb)tU#S}K04tWA&z4Ym@=xg-PBJx?bUxEkrlVNB
z+15W6uV7(Niol)h!ZIvRDG9IY5HX}iZ-?J%T=yPcs0!BjZ}PC1XgN~qip_?#la(lc
zvxX1}K7zUi&~gg9vd9vcE{~4T62B7HGI>^Yn3rw3q{$(G4-fvuZE(lvsmU`5EiA`K
zMd0zMEEqfw6MXF~#ikP17Q?HRg|W-oq}7v59a@}NE%n5*$wlNbm%*K{4zs8T48;!i
zE@#AA@2rtHzIxC_uaGTmP~j*#`4DM~NR}E(JrGtPjWR<$_cZZ~8`jfyofO@yN>@MR
z>Zss|390UrON+<oY;*A#QWkZr`9?;v3H4S1N46ZgL8O`Ry(lnYDZIkDkm@r4wKlB}
zxdo(u8=aH9ql^3$){4L?1X+svR+TwaPvRB-&7J}A_R8M74GY`SD_&BU$b5@G_5Rs8
z0Ao4QaWE)+(zlAIo2C9HLZ#qm{>?Td-Wjil@eSWAhp<l;F8Fa3;SA+P(;OXKfvOj;
zPoF?{=9;7r2;fCo1D&qKo`(wIITkaQ>amnxor^pLK0}s+zLPWGM#MianJV3uD^aNj
zgxMeT#-3;I9d*V@Hg>`4iYwi}Cz5AebY!mfomH>!>$sK18$Tca;wj^qnCjnt(ihyT
zT*Oy(jvE=HhaU%-Vt$pg{xSQ@pY_fS>{bLvSb(=}(&ad}{r-@*Fl&k>!&9qlq@J?V
z`-))f6?Id-%%&7@TJGQlpgs`_r1FkL*~mNOzWxX6Zeyrfb{~7zN9a=K<HP6GY$w~j
z&MI1HYb;E7HJ7?+D`%N8g_vXa(iUvC=Q%OyRa3O1`kL96{D@B$nOupy)4b6)SEsu}
z1p9@3c_@>$+ywcUfCMs>3)(+!sVaHs5O0&@%4tSw9mr|KRyBetIF!ngEp6;MG3Uw@
z(#V`VPxy$6{N5gHcfbUSXuO?|qqL8V2G)X~=|TBz>{APe&YmY=t@=q!Kp`J#eFCmK
zU3Dji*&r07&V^Nn9UxLpJ|uB4#To4luX(%w?p%E5{@(t=m=*b{GE00^R=6)>O(88I
z@1i>L<bbM0dI(ThQ~ra(DtVK)C677aCgjvVP4`7fbT?bt5LA~Wqn81!Wf}k!D$<(a
z)kqzk_ayh+2;i7r)DMALA$#RQLTGc}C$+alhIA}bK`lyZkCmmw>j(>a9&&Aq2ct?|
zlpm%efR@VE^|#2P?Dt_ZN0hWxtC70f)mmI3#=O!IZ*N!QbYtHS(OIfzzL4ln8;cFc
zwNgBV*lRcHPqNPCM7;1)b6OiJNZ3dY4sV_u#M%dBZ;S71Kms5U3gOSITIMhZX5Hpt
zsOM<MSy}&uKgwmK@Pdoj!02lt@#qgw^cECer%K3T1#!zfAL?)ykI1hh`!<1x;EX*Y
zLhL11YEfA61KbM$3$B0uWJ~HXkIGdP(U<XXW8Q+#&LH>)hydgvDbyf)0Gy?1eo-R_
zqOHn!QYXzT{D(gD1<4ZTBJUn=@UUPelL0jukA0LtLmfd@#Dp5>=4LpvG!DGZZO&Xy
zt<ZuYD;D}~$wwd8#Yv$(f~_KqO1)i}kPV^6bnp?KNe)11*$pH;#tL05wsoa)+_x^$
z8XUB&)r}rMXrtIJW!zs7!)uo-pc+U~yLb`Um}flW!Duuc<l87C+C>WWd8g{_0t3qA
zYYcaSg|oE?1|NgXqiBARak<O(d}{^I2`I`eJhFKO^b*8Nd;@j(ld%F?To2vAeknNT
zp{%4VS#?+hZMAl;zJD1R75#j1Hte1u`tzcaP`e$KHIeKFo|8b~BZoM`iSqDQH`3$U
z)3o4!U1VFMGKo5tph^D+L!i?=9p1bxQ^~#h-Q1|<HQrrsKN0gZ&i`&Szf;6h+BukF
zsz52vhQD3E(9|^Zduv4C>w`&rg2|TNprTpKMa-nmt#qkD$rRqh{#cnZ4S%9qbI_oO
z*+6{Thz&C88UEpojBS^}^2O#lQ#Pd;pZv+|1`OiWc1KyM5mvR}8?#Y%vJI*Bx@vW%
zN|-(VgXYyD+;{z-AHpKC3x<aX$L31{P|ubanO!nDvUv%T_<Wa&6=b<vx^H%S+Ubws
z4|9~d-_>rVV#ayM?q6LF$rQdiRp|@2n)V<H2=1HmpbS9LG?(&o-|iI%!Z#)Fo^UQ(
z<ULqU2tm>`6PShFtv{kCKCwI-+-l!9{sY6>=PJxytoN1n(_mvLEsV9nwYqdZ=292*
zX)J}NGg@~M(n;FPmKY=#hwlqca=G@9Hs6vlu6gVSBw${uTipVQ9EUabRv$&!HbF5d
zki%+w2QqEjRVCKHf>YRH6oeg4Vjf?Ok2~|_y~-@<TyGpN!cL#q@<x8_VHkC(EbQxp
zGKm)xZ}jnS6EH5MbLDt`%6a`8bBAYvQ7%`AdXzYsIW&YvI+Ab68_G(08Oy7d)>=IH
zNUD8}Cz`P|cyqjgsn!b9zMNb8z|;%9YpttV=C*5J1xif9hNIW*TKzK)gQ^>&wk)s6
z-}`6$8$%W|H9QB@xw_;cvi%bDQ+}!*o-yL5-!ZU!6-j)q54kxIvIrB+cj*zCzPVjQ
zC0W1X)V+iw#@GRwCPg&O8F@@1Mjn$633vYTkjTLve)5bi@L*ao&tgQqUcr+#ac_P3
z%a^YC>-jv_SoQ*vtXY0_3GUK(I(=f_bQhKCR==OCGJC;FiWW%MO#hTt(Y>+=`Eq)f
z*WwUrve}g2DjVDztKo)t?gakQnZr{d?d+2XcfaF(LN;-1PKSG}-~N^X!Pf&uIlyXc
zd9fm;=?Kxj*5UZ&Q}0wIGP#&S&D6XMINh6Li?v?oF;~{p{{hNK7hy)(lnr6aANm<4
zR~BY-_5#)!BbEzCRzw+9_%2|w=~^FOZLBaI>@a7B@!~MY!|2H@_ZFc3CIi#D;gv9J
zpwyEgT1njbzu0@ruqfNFZ&VPZZb}4{R#c=_y0Jh>k#14xl#~`lK_v`AT0pu+Qic>1
zgrR$AP`ZSnbFT}$pXdL+&pwX5KkZ|Gc)!ez_sn%(XRfu*wSMcj@^ycU@goWh!7!b-
zZ`VQ;*zNOv82Y@QAHRKAYrom&=6P`?R+?+3V+-@FYdklT>+wsa85%LgKL#OEIR`8<
z$xl*yJl_Q?)PK9dwjcg1^j<{j&Hjf4|IDmi3Nn+s#%u2R=UlZ7<E4nLFqreV{|jwh
z-|J&sxW&G5dFE~>`K8|P!#eYZx6Y=0PmSy+Jw<;5Ul^PtHV>y9EkR65L&$ty;BQ4i
z{O3@K&$TE*ZKC=QYtDH*W8@}Ccq;^oDc*O3sdU}A+<Qoa^o2?`eC+uwapIhI9vP_&
zEds{RYW#szaQ=osdPH}dG@)DPL%`aB#C*U9!eZRU&qa#q9q$Mhj0W^OyjMsZ$qS*n
zi8xVzVhs+d$V6$XhbIC@+!SXrFMQRGDmg(y0GRX%HYd?YNJAe*kl=^8CrFE4_;Sus
z(AkX(+Oqg0aLbRq`(R!bS`0#3Y@aiL+uBoYf_<?k^7~>A16kJYY8}*1KHUzKyeoRE
zL-}XafBAVWuEKJ?hkK-o<L_8thXvwyS)RyC8ism;*|h13sEQ#n?d%Uy0m5oL1H+vL
z_aDG@kRyUE4>5~RAZGEXDZ;5=%>gg=e%7A%9HO#7*;+yk-DMQ}pa@vOKaB8qRpLZ7
zOp+_ix{Y_}Js$lL`NB2^cqnI#NDrB2TWG9joiYcq7*JI30GSv&g*tq_#I9#Sj(qp$
zcn%ExZ-Ksm9#Hu@NN(ec*lW<DdE0eG<T707X~6_T|4*3rHu91scDesIO?~^BzZQ3m
zj2B_c?O04iKxcGF1cO?>4Y_a6A+M+mJ*^8L7KuXO^471VEJO+%_9HJ@V1<f*=kK3|
z-qJaqV8`^ho!~7Lgs~`=1=Jz^^@sl6V!9>(_HZ^5y4aR}@CZJVUi2XTZ*rM7Qh3SV
zx_=hTFpi_rOlN{%y3Q1~A%Jz>1s0^*^nGgNC65V^LhmSqBXVvh@E!6&rw7i%e4$-g
zZsaA%IQsV!U664cg~>1H|DVbFKa=(U&z)6$P~^t?N4)2NwSHuM*z=~``Xz-#lk*xr
z3SpmLblg)&QNDGa`$b^byQ^FZ=Pqz^@?O_E=gH)mhbyb>osnja{S~`2xKX-fU7pLr
z@}snFV`tZ3@1V+U<nh^2HoThr=da;-<KA2&W9F6BadmdLV8SOBz4;V1|Mq&@KLk_`
zmB&o5TS9Omw}g^jv{13}NF-Af;^Ps-B0qQawqS;z--e%mdI0G#yARr~%ac(>FdMQk
z1RTSspoX7+rV>03Kj(*^`vgO!QJ-LoL;(U~OiTLl9NA-d=J4}4LsmWH=O^Lk>8FEu
zB&tpqwzcE?o61K9)SZ3$^alK#f|H;bem+I^<O)xHym*TGmoJTP&pyYIy?sV^H9r>r
zyvns|cpk^V_VgHj12p<UWE}c0TTSPnpyM+;=D9YfDif;}{#rA0W^spAEk}`$EK@JR
zTkuZRZlSEyHU-73cv`%Yj;ocaMPw(uP$i3a0g}TxNTGi^wEh0a@Iy`^O|2xVaJgN(
zHRLESKQQxitd>E_`R$~)kYYm1j;?P0BcKB{5&NML3$RaUB%X^S?%FXwLFic;HBZT{
zV|Ys>qMSV1Rj3HNrBzwUm2qSGnO8*O7K0oG8lNp5=NRk<$o(^u7d+QHGme>^tISp9
za*Hp&+L>ElC2-xzy!EchT=}yXq;sR+j?WG!hsHlaJ@;7mG$bUY>lv#JSpEH+x!Zd5
z@<c<7Dy}L%=ZZafyWieU_vozIY}dpU)0Wr^4O2I>wu5-&6`}HvGOSzi>vkgos=$}W
zOvD&wC28;o3^+$WqYNsqr)<8P_!uX#y8n?{ui`=JaE!QmncI))2y4dam%<Cqx00;|
zEq5hc*;qDvJhvw?Q4C#Ew}#wXkBj?^`%9fOth)A4YTtl==!uJA@ZK~TEw51x#gClm
zuFmhRl^y#PJI+hCBC~#Ws#)LNQv|*48xQr2Nq1+eiy-~0_Mfn5z!{W~P(=)BUFm;w
z48I*Vp8SnC^K(z1audd$its(ysxSB_!M!wrLC`WcRQhxp=TcP~NoS=7+IeQaeS)H-
zu#HGy(yA-w_`<vKmQYC(<AZT^!-c8@ZXVT`)yfFIt!9dW$xWB`>kED89~7f^q&R&2
zo~dPAF%jdMm+X20n~$C@_#*pL+3$qJ7;26wKVr5~Ej*4FrUhoD3vqEajeDLOR`(NF
zUSq!uvFNghkJ2i~NG|hCzN3-uVc#e$dA6*$9#|lDQT2&7=3sL#8&&SsYFA|Gnj)3I
z+w;k%F-E3}Wq!TMaznD$sfAx#n#=SekNk_c0Q6m0l$xaWN3bYgVNu3^OH$zx406Gu
zcuhu6uvQ&twYquUP4M0+5W28qySMUdV^+o{NZGq4+OEQG!AdjRpjj_+P)%F7Qch&J
zV|X8X-&$cEvLAlcu9T^pr9Wx7r&lnmD&<h$?5Yzlw!ogFTRN!MD*3(AY+@y)ztlTM
zD{ohzVG8hEod1t~`i&~DBh958bT3G4inh-)^x1yb!f<)lP!lE1DdfI;S~=lrJ87k{
zb^m9QXxtBTk6brjW*OHeidIc)qtEo)|8$TC&&~FfEq>`0r)2EXHdZ?LNrAZ{;o3J<
zarMd97H&hi$xcm?L9du1YMsRM2{buN!ZZ;>+xGMeA%!qE9pjReFgKS#v^?y#5WpwC
zd8=C2q_nt*aeZ%@LSMwbgE=R925Xcz!)0AEOPyDhnn6?FFVh;sU)AI>NycBPQI_3Y
zjdm-QpF7yc>|;~b@(SIGd@5UIgIV_?>MEtW_HkJ;C2g@$3H>Heh4L9g+YVuK{FUZ?
ztZAf3n^WFuNuJ`Ym>TEXwYxQrg}>c%2jI-xWu0EQb<EF#m?|PUu|<GdwmKYNg#^R-
zLtrk%t^RxZru!83o3KNyr29R6VY|D2*I$J3dWLk`x&5ie+u0CvoRplqva`OZw!L~L
zOh7xg(<h?K>0W<}L%h5WMcT!Um3)xMnC=vNVm2y1yc)-^=SwN^iJur%+nQi;gm*qa
z_cxW%ijz*0dy0x>5XuD+Rt4v1PwVkEoTCgn5Jb^@59m)uu*)?+H{gS*bH}rkhN%OY
z&qQs;^6VJC5hCTQUi+I)^#;<3-+=uXB;G~}x}X@|j9p7~funGwI^Nw>2SJ`k&Fc*E
z1AE5h6D@pY16rkE?#3L*-fM7-h-jaq{y$L+Ag7JNpoI^;>3^UP0<*H*&<8BO8W|?o
zE-R~1lZzWtVp`~uU;t06tDtXFRiyq>8hmp9VgU|EEN46lS)$ucTM649HI(i4|L}s|
zAz<R6DnbpG-?$NHoLUH#3xD5diGFzupLmx8rf`4YP`g=}tP+N>%H+EE=JhtYP0CeW
zDTF7f98j`dH7mgzBHD@P2qCpXiwM^y2&xGtHq{OJuxjtIAYl@7yeUz<&{bb+E|*0j
zNsm1E0v>b>SS+~{rNRlhmbp)p-a*%)uz8Cw(b2>QRRV1Rk@NV^JHZPwoZG>Pgz$r>
z&k2+0B8*RW8CnbH+(S~Jnr?Jb!6@V+hE^#+sA}-`Gw}LB9%-i6tJfYg?Un6T_^qIv
z2n^1`C?ywvxe4ORR5ejlCRkc~(%_rN@UDPzkA%MNGPKuBlg$r)QE%qAPaLqHr`oG)
z!$<V&H)Nhi_64==Ls&m2{dgn#F(CCU=;9!rvKeMJp0W(RLLohT#sh46uA5W__ncsm
z7R?hreAqCge(5hJq;}TRO{Y>b)soR2DT-61JPfan;qTMk2g75H1Gb}F;bk=bmoH!R
z@_$n*GhR`2cbW>ed-y~gX}|#*F#XjrGqWyE=c(b2Tk*|i-j8pSB%ngUd2I0||2tQ?
z)xdfcCT2(%JS!lLCLfzY=8B9Ui6MeR$x~HeQd#|54WWVTEgCqpONZM7@8}T7Y2Tk(
zKW*sNec-H*DN7oliVuOM|0>ItY2yAX!nbQIV?9%&G>I(T;v7ElrXey<BL7YnY4)a5
zirfZs9`1GcYr7jqJ;wJ-e`)%wbNwA-m=s9rlKE$mFD{-Nb%Sk-8#x9a);GzLIqp~H
zd0C8@cwhegVOr=J2RqCr-^oA^9UrGbw!J?)UbL6KYuM0NhzQc+86#$jvFpE2QWXfu
zxmX(|2H5!udIXO)AOn3vki-&kU{Ii>y6UZCiivcgW>khXmpOdW<=-b!9^F>tUS{cB
zk$DUia(_S#$yVROSEIz#&5lKiTep5QY>Wvt{9?Ge*rq|?g=~JG29iS=5EN1!Kjaz4
zA@5VMToX7g_%!Vt(&1AMH+76k*HxwZl0Ou+YhQqD%HbD5f@kmucG-}p!~Z^AUJXg0
zkA&Af3Z`yra%o>!As>B(bX$>-i8>;92=^RqJv3R=sQt9vG_q1rkP)vHu^f#b9Ce(X
zA0=THszA@3Y!tfoX1CTI7J_Z`0bafT1h0f}*4pvjEMTr^j1?<*bH@@k%@a*zeHu?5
zii^{_eh=$IEP<CRdUrC7OY7^CN31P}2PTqC^db*qJznzmbgvBk7lC4oF;{fxQ*E#b
zTJm@h%Q)%9JK8|(7sVnj<7~UQ_>lSM?hMGQ@$lqdOxy^d%rKp@)<}9p0u`SJj+Z;*
z;SoTg8yL3G-(kmAS9<Rhn$*g9i?1k}wnj)fQcm7M#w}04L>m!26T4)dG4gYmOMTtT
zwBh{^_9kbrrppg!j`nX8LDlWa@@pIYE*>kx<}NOKsqfq`se;@mki0nh6=mZ!vhVQK
ztYt^zf@1ie(;`FgjmY8DN&THVcGPl$vXze}*^m=v@0~l_(X*FEf1+$+jZd$J?e~9u
zy@p}&nwdU{Z%2#h2FIggky;_!w0=5VBJY`zN1=VcOb`cRe6!Fw*yspP#>H0}ay$s-
zb{jTe;lne$aSX2nG-8}F=h0g`weF9^?=^np`yjV=CbF1vd=VKmB>@w2#KZOP(~BGf
zT<Q@`Tx!o+nKwthKffUs)q@qAUg-;FKavIu1W@xR`<HyErBW582AjQKp(mA_9E{rQ
z+~_pQ#_CUf;|~wl>R*isRma$yq}@4ZF5;^oy)U|YQ1s?Vk`+>4W^XKW{gLigvY%=6
z#jIb_zYN*5(licUI>Kw0*_(!p7cy(*DKlkF_7yW)ht<$;o<0Pb62f`wJ^9(Aj_l%J
zk$4cs>qJq}?+wk3O-+Qr`ai_e_MedTZb~&))aF+M!M~Lq7HX`A4v{S+P|`7~Sif1w
z((0ss$>AC{2>nUv<*E2)OX9EE)m|)C^~G_SuGh$gagO$9EXCk{rBkhGf)$N!RZ!md
zN_ri6DHZr&BtAwQiq{tUC%<OYW;@b8;20IisAWe5{eW)E5|_$r<+PCF#q!z>_6yZ*
zcD!B?k(b@ZnATYOZ@8~aY9?MM3>R~n6dWHbMrOVQw)Ai=e+4eS8mdASdJniR6xFO0
zP_Akuqm|2h?|8rk2rh=z;U?HVW^u3EwH)QSq2|hJQ8)~jYULopS$0Fm@&vN=DL{%z
z%--at7Im2oTVEVbtLn|`4(=`N*(;Gsn#blJe-2Hty?u$FOOC<&z&jvf-RJ*pd#ra!
z6UymLG)Ib%mwpvO8sde9N@nlaLl&y}28-=*`K?Gp4OPnGrOXE}wqjF7xRvujI$7f9
zQeyDhXssiye}7NB>f0L6URDt=2KrJ3mnwyfEg4xcE~4OTY(}G?F8j;e@+g=kKa);r
z=gYlk)k<;Hq`HnM>cCG=&1)R1=Uas^tKF0ZYePxy{Y7q(q;!|kgQ5g30dE?@bU<L~
zehTTG7b2`dPVb$iVq~-q_(fB%VrAZ>GST|MIMUcLoT0ZuE*zu1MucAh`jdd^;%F%<
zw#}_J!gqDAb;PRWnXX=iT?*-M?oZBVI<N_GjnP6`SIwHzA$`2hI<9#{3{lwsnr@8B
zejhkg8?&W^ePtex`-d=zJviv#<TP^&YK+YNRJWGfP8%uM=Ke$BRzZd&BMe2h`s5#H
z9TlB1(5n^=uSW`fI>mGJN@{?X?)Z-L=FhXtQa(zMXg~1+F>bs#Q1=_{GkdqsjjY8`
zuv3xyHANL%QcP`Xj1VewoM=kvxX?I_&&`%#oS$u_fxhkTRIsEVI9mh8?J>X41YR@|
z?u1Bp30mkbW7L~gRVknc$c)0*ov^XWsgY#}g}<cvqz&c&e&Yn1$Nd`eLspPS-UeQ1
zM5`hq#NTxI=wamYRet91|G|gs$peW7y{x(iLc_=ls?*?y33Tyxh+;&(&c|1(Rf!0s
zL7H5G#w+yoJrElWX)+Ov;2XGKR>*VR@SJvJXx+chF~Dy-sUr1}f2+{5@dQ*|-&tSy
zu%(&*sNv6u<O{dKFAoEhY=Ma2V-2?``C->!tf#=;pn|k5O&6%8gD~3x+0e(&yGvCE
zxlk34>kH0tqA>*uO&S1$gE}x5oYShf^@ffU%BE!Md>I!@aL4eLq+fqQ>ajHuxmI<s
zr<&liodE?vlY#Xp&czUxXANk@(xEWm_V^qAdtLOMFJtU0b(7_SU&|Z-dzIA?x@a#O
z=v3z3EAq9I@t1%4@rIro3IuYZ`8g2V3r1c&gUlu|GVFT%c2bPXY*+jC+MLUPZOMhq
zksN)I@Jnjf{ctQkn<hKuG#n~@cz=L(r&!)jJ4X*mV!!qI*~RlQZ>2p}r_U#pKwY}-
zJY!?1Eq)Iui(P<n&>9E_*SE|Hu>^b7k5{MFXt^|Q?`$k-Lq$nJ&-M9JYqkESG?m8}
zE?->eFX{xtdadmSr6n)}io36TW}Z!WGEM?@BfbBl$Hz>f+zxlhsIi~L%Za8afjOXD
zkTB;}(h>f)6lv@uwone$DW_8+jXKKRan2Lr+H#-cIY}@VnPOxmACKQGQzog+Kcg#r
zR>YwxR!qIu$TMeepzEdS!-sNdBfwEK<hfMAAno<FBTdzQ#0jd3u21hk<|smR5O{i-
z3fZ-YmahJpADMZ_7}>*Ygo6wbX-ZKKh*WVKF?MK9tWivRFEp&YRJoHY#b5~Z98$O@
zf#qJHG0f6KJ7++9^uBqZ+O3+Q#G&()R*oJv8GDf!3MBKib9O`F#=Ag~HlNXU@jOw@
zN(8UI^Jq1mW}eZIo&D(Lv%NtVLD1+yK}kxFN7%wSKO&Ykefx@(vPCq;&K&X_#+RyG
z7xaKZM5QxTxghPi6S57FBjAAVKQUS^iim(eVX}fS543wgg4ntu_^n;ZL|Fa-J0^gV
zwfK2{)NnZvdH{n3KQ>x&dpemx@(IdgeLe%q()T5H2d$ov^Hg@&L&~QD-{Fsuxy7=j
zp1i7kE7ctqAm3nC;?~SO@016fTFW+%UjO*k=DHP7ZjRUHleDK+K*{TJ$P2X2^Q2Jv
z<^v2Of>4oM!k<Whd|;R88L&H8gB-T%N`D7r4=z3Zzk#0*JGK??x!4ZHTt5hn6CAAp
zl_;3fO^kjpv`rnhXcMf>$MC-Pq+4Xl3DQ#1JTbW3uj8aU8^DF4HY#2~*Q+F<vUqte
zpn2P@U~vn0XG&B>u65WvGnMlKUJgrDQ)z7-;okEi<YhqUv2L*jY-+!rXOXmC<cT@B
zbhp&e)YHRtpxBo7<1<^~s{(t(!aOl9Eq=8I){E-JHrRqoR=v47Ql9Iv>#TV1apO~m
zVg)YYU;|GoBU{FchpHK6Bc~1L)-$4##Mf`sTrNTz3Mm3hr_6piT=1ro-tgtiKUUuP
zAkh?<gSG7^aG?2qJOF#MDX1yh_2W(eLqs!Q-^Ie{Kg^=|CfFdAn4ya1!fB|%`IBPG
ztfE+~cYZT28nxS{u8Q5W>O&<m?>6r>iU08rIKwQW-um$2!w(Ptt78suCy&wwS1r|_
zFR&9FFEM%%s$Uhzw3c)7h5DTm8Ye>d4AIlTHWIcq|NhL7lmJgmb4gQrAqjf&w%sGF
zjUXKjO)}K@y}%FwC)$2Vr2?3g3?04^dh9OOFSJUez{F+rCU~v9+GBE}#6@L7J=uF^
z3obRaz)DC$bFok2@&En4e9BsrA60BCm)?BB_~NG%6apRAsV*_?LjpHauvAT6=uC_D
zBu)y<q<{{HQlF$wl=3LBw^c1rI~5E+0sec}bm6%)b(#noAMr+|9pQwmt9!e<xYllv
z2Ak4Ap3pZ;B9SuUu-=7HHa|SfRK#lF-8jb<iAZ=mx<E4>;j)`CYVF@C1S<OGdE&a#
zlAm1>`PQ2~BoyNp6<ncoUnsfR>=AbUJmf5zdoC1UtFxfOu<J)53B?%YFR*i|StFG7
zn@n7rgp-@hStgAxI>#^<HalB8osWHK>tp|H8EE?!-ZGu`Q#l>cOpMwxTj|y|&H+l9
zi~Y?4RRuOGS6c;>yUoNmfM~^%$lK`@F#Rx#JHORcZAP{`a-6?`#(It$w?Ra=IwHFL
z+CH8#yYJr>Yo2;1VJmLp{+s)9q0LhX5&Xt1z8NHSKmnXD@~gkXvt+ONA<d&^lS)sU
z>yRXv1+Krg#2bj<&2Sj2=?7|3UFFMXyZuM~lglPQC<7;*%pA*PfyU&P-?2CjRTj_H
zUZr1rI;c+-saS&b?-ZT6pmw&h6JnUl?5p-%g)G`+ou*ofb{>H%&xVUG`^?mlCOy2r
zj*J}J4n2lniReTM4)j)7S1Cn-aKG2}2CvM<pz7$M#Ac39O0rTopd2cYvPGC1Uk$H}
zetUkEz5A7+HQ(jAi01T_k5?Yv8@g3+Hd43OM)(iu#OW&!{43=8vjN4}!u0RSiv=H`
zD1QrL^h;hCT}RnG%Zqy8om(3;9xYV)PPFM~;Tdv~gf~|cvPjo2ccPjvJ~EqH{GRHO
zw(4`SEoQD+!%ko*7BP>J)B6k*KaqVMc(9%V3GN^Zc>M#3SiRcz{X0Khd;ZKW{>+{*
z`vh3b|7`dB(|;7g@>i_Bm)aXp5|~0BN7Te9SS#OR9uva)jh+G5+@_+XoJ+I#9B|kK
z987YSC%ac}6+1@o=-zumnD(!JjzaV^39`ub@E0q6LhfI)q*@Fye%O%y{BM>MUd`2|
zBXK{DJdA`n4tv1%>LBh>j^%4YBBVdn3Gk(FPn^-;_GDKO^Jq9w0Qq$*m^@PYdgA|@
zM`z*RHwb<^BF{C$b0XTI#s5C1hnPqAB0Z6RQ@|`ql*<Z&-}UgNNf%{xLKvhdku^wy
z7*B`~xdq<Q{v{SwhZ|cHx{V8d%61MEQ{ZLXU+qfekO&UL7q^lT&xGt(u+PWd|Cau{
zu28A7Ccl?U!55iRHu&z^{l{E?+uT&BUN&(bXvx{$kKF3XvoErpqFd-MQ1skbTGo6w
z%>zGstA$UNx|`~e!>^Dv;A9M!pND>ur7M?=<MFxfq0;%PHo2fM#EZSwqkWV}CMwKx
zP`F7H94~m?cdtP21u5OVx~4O5IWpVP21c>#OJZ(^^V`O#8hS5MI_@p}E0UB+#>W68
zeSt5tl%o=Ihir16=$eVxaj|Sh^DWReBQp|v7B1lBKAj^Bm(h7W`vr1aWOR3dswDR8
zEpSa9!=pr2K}Q4q+tUvei&byS>2O|6qakr=#Ngf>COC%7oDhBpDTd5vPeqDICg=Js
z(hONY{EQjO-b0m7a8_1_0{@LVwL?@jaO%)Ld@)I1wys&B7g?6QCWtr@11q0CtXcW4
zT5s7EQ0ropfeVvxTb#xmIqis$Eq5x4Z(HyF(r`upSCeG%!cy&$qH;_xoihtOO{`T-
z4T|WJjLe@q;gWfuSAO5|>0hfH+9~pA(6imPj#;mp?~XV0e7+zuC4ZaEe!sj?n=SNp
zICnOgRhZ_YSLwOcM=WsP<AhLP+Rax^R1PJwY?rHcb!6l|4jC%wfux|O1DBStb(n;7
z_b^2x=NUDlVv7{IrrkS0<Doql)1MARO<Y>pAHiH>H4$ftj71zpg$P5LAzFjgTT&Bz
zH|l%VbLovgXy2EGB|yx^A2tp3v&k#OH+8ECCY2dQES*P|vrR%Ad7nsv-wvwvnQS#^
zmsxeT_`Wwx_Szh!-dpd!$}Hn`o&9BKmuaNoB#Wt@6&a7g*Sm2`RoYScrd_v!v$*s_
zvVX#M^(U;1Sx(m4uawg+%1&G9Zv4LVvcha;WZSGCUlWZg=c`)mATD`bfA*>Dd4eW}
z$#FsjtI>iR?8~C@h;09A1a8AE`8nKO{;K8J#t8lxp5MJ6s?_6cw$}FCMmRkAI7IbF
zSGf6l_me$cG`aVm2NZDMnk&ELaDU`l@Qi?(Zoa8jZn;>I6NZnLC&GBgjTI#$;k40N
z!eze5WHc}C4VPKP`wP)U9*K(l-D3s!$|;dkmXEJSj)_aCPz+FEx?AbB)q-uSqRZ>8
zqt!3kEV<C&8b~vE_AV}oflDnTy|I^PA>>4-0)gcD%l6y&Q5SeQRAV(R&M~gnhsNDS
zug++!y;rb$a>JW)Ei4x<8BzFGu3Tx1K3gWm9CIj8M+Xsi@PMnGW?8OChE);QluaVG
z$-0BFb9JrPDavCZu>`BsVAqwNXt&t$Ub#wALmNF#;a5Gq5|v}Bfn{Dx4kz~KLfOnX
zR>(Zbe0_?CVK(P0vLXKzBF^-%C{~NvdCDAnE_z9hKdfY0=?3pe8P?V<ROxF%<K~=I
z<#UFL6}o3NR~~N62;i&HeiU$z-#7YlO`d{Tt!G|WGquAVjc(Rdv!$VwKj!z7c)qV#
zUBuAojh=`-V-(TKDMSvYAVU~3nn_ylEIBVh=(#<OWmulPEo9r@mGWIsg3}Mmy3Zu$
zQl@Xk1<PJP=0{8xD~4`Lcl0N#{zVm-R1FuiaDGob@|i2=UgwOq!x}urA&z<bg?<IB
zlc{D1)70>7Gp_^A?T5?v;2Ku56<v7wk{^(?XLvMQSsgr?`;_|&9I(WpPj$=6SJP;F
zrUtq4wZtSp4-(>O6EjIZxf`&~+W!3;YiUx2;7R1HM<4+MTp8o^ZvGUvdaBaYeA(8C
zX`s{G3UMqPek<seyG7vy_lK?&)igmb(h@o3`i`HB3F5+qu#NH~99q^x{VvgBlZ?JD
zvBQWj>>BA!UY=fibwwZmR4FdK$WvcechQrD<EuKtl<aTn7SW=fvu`<<x81(|V~pQV
z@lDS?<~e#Ie4kUB6OqQNdyn>);#sX#6R2g`CO`><=|47O08e1&v2gtpy!QhpR(lRC
zc<5GB3`Vv5cso+kAcDrrx<CP@KNI>aB6*j<J5XW#P=4br;sD%Cee7>K61H6xax%hO
z#~B?90n?@(=qT}d&_9TN>wy0g`||6c_z!s9DRuX$n>G1vR(DkIU4ZXt8R-F0&<X-V
zn)_=fT2aSU@76Asr6!+pPDN+kq<!WpJEr_Na$f#kr6ii5mCMf^%QV0qQ~xo!_=Xs>
zn9G_&sr&TZ#9NY5&~}?hDChY(iPAUzrX1S!GPdkIJ83=3QWto3P}9y)?~c3~Y@%Qi
zI2gq;o56oSUMn@sgid5UaN4*9lh_;D6Tnmb)A6VOAm|M*@b9i)dlxr&t0w<L)~|$O
z>WDpcYdILNH99sO%zFAmr=Agvvz*%eGw4z=^2envufbs8OkHVt6~yBSwP4R{LeDk6
zJf?;oDpg0ChzSd8fnP)MafZ`m6Q@&ire@Z79>R<Ybys7@J^ek#ksHxKthBcW+zVSP
zCtlHTB=zUs-MOVhq4{>M7Ev_;NtnfJI<C6kEbP9*+s5c7e8AYbg=hS70@z2>;XcQi
zI|c4Zmtps*27=$!Uw}fgJ6(<a#|H5lFtKxF8kV`P+k#(&mNoWZdoC}%n7hk|I^qWH
z?`;Rz6t#)d`JPk}hFX`S8SRkZ=B-jzaMpwM9{ViwWQ6+DuttbDX88ZgM}g|BZJY^-
zp^g|*<IsyTgRu%GPt-wg&ccJFuOVLR1*Cp@YqQ;NWITa=3VbdPNOLJ#IqwAX*!Q8%
zW-gExx%p4rB`lR@f`=V0q32orO(SW8CSU5q-m_C%1Wbw%IC=Cn_kMp+?aDXBpn5jI
zgP~XHRVrnQ1_qf%FjL=eh)hMdS@)=EXKHY^7q#7aF@X}^9*Kdf4VrU3IqGmL*7$t;
zUE!0PyHX?=%SKGIIs~j$&W1&*u=99kM6X_;MV9H)V-i><6&Pp@iitSXPvjAVBQo`h
zGWYZaYRXODtW}j~{{q_3E+7vSGN~b07^xcBFMwOd99%5A4c&%Z`pJrcFH`DI^>Ls+
z?YH#x`9Z6lZc_Vl7J~oz=B;)>Q;mw%C_`?s#=d)NvHZ^AT$)gwAb-=TE|fJ7nBFX5
zcP#swJ=?F?7|xoC_f)yq_i_i^VftY^DM#VtNaGua0b`$Z=rhb5NaS7EtHqhI@83;0
z*?tD<&Be|8Ub&HGKvxU2w^7~-N=iXej*P@zuMB%(A9$~IYft@yxl@E8*z_-R<8bw*
z@ox?+7DGyY0OcJ|!(_}S8OPtZfp?ZO`@jfU<A&yYoG@V*Vq^H(dCE#X3Z?#mmtSUc
zBm-?w4rV0RW8j(90CJ_9Z}y;y!hm}6a}tc%`K{TkydI#)E`mGH=FaewU%}w!2%R4Z
zJtH6oSFOMUUGPz9fak^=+;3^Y#CgV5Mkxy63XYG&GF*oq*E6qc%=!oDddB^^qz{$S
z@(9W3(tY-}mu0T1*XtKqU7%T{3Aq}|Afx~#2mYC>I^{osa&#=vfPJY>GBt?-F_z0>
zOZgnm=0Aaha1ED4#8c{bGxj`i4yB3m#I)<-4xTFA7q))a>^v)bI|qo1wZKmyv)pf;
zS5%uW7Vouk$1}QE%x7moL_&}#1W`B9*106&JPFtwnS)(*U;&-6WXJk>a@f2fX)}B6
z4sMEhd(jxqF~4ei{55c^(f9ar5@Spzew$^$Mov4~(i{or#Xzc`<pdm((^RE!S9t<r
zglQ5LdwDZje*<{2`71Y*_D<1!DzP0fJ1D?5Vf@vcKDG}4S9<@oZP*!|KzBQ?zlJqW
zm_g6wl5z6HtaX7`@k;`=Xk7?%)_{H0(0&f~JNM;hgQ?1lp3E+zm#tHpSLdot7Bvek
z+EIINuIW%PUJxO@2Jgj>*XO^$Q&!|yD4~R`>NQ73M8_kmx&*wrsZg`!qTX~?E?VIQ
zDXqft&lQQK@?^vvxNijBC^Xu2!RkQTL`S%aRopop6q{p9(T~KJ6UT`tey*M!GcyKq
z({^=LDbtb}xM4O%37Ee&b`-d7o0<|Z2OC@Q{f_@RkZQMDQ+iKE80sZ{Ol15Q3m_WJ
z{2%ZN-KQuUO_5keuc&u@aDTqY7(qnIk}c2by;|soQSuY+ECz~C=7Xt3e^UWR?^<#Z
z3-F6BZ+K282CUXG+J8Ur*;pKEOpx(?J+cf=+AJWP<&>5wr6IvIkSybFgXm*VoJW}h
zbWV#Zn#K|q=kpJD7BChO$Hfbj*L2(23Gj{Wdt3GwX1t_iwOb1Dp1f^;$)pr+;%Ul>
zl{2X^tKW!NW7nFe{L9>95=I;`ywskNBpB-C$!k-9=-BzQ1|Oi0#6Vp{Z9pKB>Nc|S
zFHi_0Yj?J;K&3ctvbi%{+sN|3XMG-fbxh3RvoH_zdthJKarAPGQ&e1WbDbf$JFtGr
zKchwLb5&*bG|pUogi;k4+d{m?2VyYa3?g=ex+W4^y*x7l#F)hC$F@iJJLh(9VhkS_
zow70+3{NlJQ59A?8jc64$rgA))Ii?-8o7WY|D_TqJ`=#|(`KDl;o3hHovNCU3;e$X
zMc*Q^FC@l7`X{exW@_AVe#j?4=ndZT<rwcMr{=W6CIKK3b(3HsqQJRx(@fU7gYd{`
zsJO=s$1W%KO&$o3uTKP}j%2+fCA=Jc_;N%oLj8wM;LiQvT_qQEIuZQCBI&4d^Hvs5
z^}H}9AgR6!Ac}=0E+V<NG-WMbo>-fcT3|tk4GRC5g5$3A<CP^M({C804M(HqyBgba
ziNRQ4Cun{MW@I;h<v*B7i5lSA4dpkuzq)P^j@0cj6&@vJGUIX?Wg(HN5Clr?s8)W$
z5QLx}%&ITGF%BX&1zErrouf+DG|AYERCTIQbRA`E^0RJoo(9KjcV#F^?qsL><n3%j
za84Cfg~q;Tzj(23cytb816OSqdU6JIBcs$NOGbPK_P=X<r99N9##V^gq2{zD1j>&7
z?PLHK8~{-7fO()qdaA;ijm_!zx5MrI0s|q_pwv;#J{s#~1A0^s_{^njAt2OvZ6Si$
z1|TTkUO)EHLyWoW-D4Z)4?P3?Ibau9@;2>3gJf{M<kx8b;SKnYuTNO4#7(d_ekfUL
zgP9UOE*Xm4e)V+FFOK1xKwvf?{6h>OEEa`U5U%q&C_=hSiYg8h>=<E^yCtAaRjf@G
z;kt;RlROh&^DQ1X!T$JxyW6?P5Mg~PGm$1u#ll@_C5GH1eekj2Ssu^a&(4?rnq0vE
zB?0-jQ?M8^BV>ZTlJdvpMu#|0FTa}cNYRdp?^2O4Dbv=7oF9$8lEw=!+AcE>#yyu-
zWAxZBMnT(V!6KfKd(76(8n0q6tZwzU3O!gRwjHdxQYrUsc&TYTq{213U(+(}pPBK>
zw92`kd@g(FtUBJECH2RN<y1KPxIEi{;*7Pzp0uG<1wSsm9+Rlt1dj@{>^SO1_Uly<
zhjy+*5y-smiR5MQC?f`p|0j2)Bkv9e@n;tumQJsr#Z1@8p>A#tql_)aseuE@E#D8C
zXRBx<(u&#}Rk&Snt1(3;2PdLX?!UiNd+)XOqLY5piu=rdef`g#y~y_cMG^Oj-<^SJ
zg|SImw0f4qFWnicY|8T=H6_Vg%Vgqkv3ZL1#+k+?dkl_uD(yEaT$zSV0uKD;bDK)L
zl8q?Icf3FLEb20@I9*5MnTVa2=5wurF7F9_CIX+!ItcGVkx3Fn(%?Kqud(ISuEYmy
zSj@DfWNIhH*X>@BW3HP1_(&($C><SNI)8?iRm)(2(zZ95UMACzoT&erIF;A)Qe!ZF
zPGk2hy5-T9{mRR$W}iKr3)&erak9Y%lJ{D>y;b6@obN3*l@^(`GDz)q*Xn(BPdp*S
z@LBxUT;EuQ-Zv{fl)InSqFzREcz^$f2d+e9Z8kKo(SL$$#qiTd_ABeto8t>_7Nl9l
zAK&iOSJsXcGt>}wvn)&So!`9?tRU1;nY`2GX<}bdwraRzw3j=Z)<dh$9k<q7!%OC}
zl<iq`(6|4mg3o{AkFFz!Hf&%+U10{;Ueo}rlRq6>9|wrK$zX?PvU)EkB(jDw++wP%
zRi%6?hjHtLbG!DxRj=|aiJ|K$lAimpJW9Z+6u3FNt(9r6#bvJdkKbR4yv$RQHK;<9
zo8X}LN4oQ1J+lO()M-@U8O+?GY!Onl&voGHzSB=Pp*LN!P-VICZpihgP=Hu#wl7~~
zN$gefpN|4eajYkNhPMlZS^Ki*Fot<|xH&RfYoztOdLI;Re-3Y)<jvYA-?kq3W*d#R
z%4~89jhD>4Zco?oX3e;ZY|W@`(08(NcO>w{ln^S-ma!Ja40es0BfA@nbx2o4561cq
z4y$}mOj;aV2qwdi#+qlpr58@7q4mY&N7ur8J6G9%f=+S>lS@$9LEZXn6XlmPsiY#7
zE!$#NS2JEIhxhM9S(kOqCY=oqNXLemMXzmS-<&Zoin<`s5$oijvNL72{WWUc_L<cb
znZ&@ilIgaj^>nv+nlH<?WlJ?Xg5?bknoOGQ(j)#;26<EYOIP;M%T`*cQ4HRF)7dDm
zt)0lZ9p#aMu|EIbgLzXu9c7VR>MxZt1#&~Y_kO0bcYk)Kkq8V}0GSi~p0N~hQ?{Vn
zAv7GA4H_YTSTY#ZBrz)MS4fJv;1u44Zkd}8E^M04j=E<PqI$5caj-M(`zlvg*aVxc
zF5DUJWY(B5Rz+Eob}fdzXoKvnvG%&Sw`-@lhv^u%hwFYpnr00_KmY#z_0}G5tul(G
zdwUym12R?Fo2J`i#jNwdSJzl_cdL9tb0Uswy;Vihr>3%(Y^35-b$ZbmJm0dJ0dcQ!
zRBpU;k@ql-OFSw~(^zCpy!oqToh3VF#^{XziN9@uuqW<#%r6mE#lG7#pKFC_V^H(e
zzBHEi&Vw+1*EOjN*CH+Os_Ju3y>k`FpMQ59UCwAp8*#kzmGVbHMx&_Y8*3En=~+>B
zHmN!ZrQdqd6?60Lzt(no=0aMRL~Uzxx(4~j1Z`-F4@#F<_lg%px>@ukDy^8a+`7`u
zlIN+cQ@xCJHm`?!WQFbFs#}`^A0NEj(-f3%-=sFCiSRsV)0e7TdaR|>62Tc`RiTZk
z3^;Q}nEf}ojK!0lpN<bq+%?lQT@RY;%qK6aqnlUP%qL}NDn)`(Owx*rzZIb8+!(oJ
z@F3KHoFRUW`+9^bP@%e=g(^pPT9tXy2cbu^@Apq+@u4$+YnlU2o1!l$zjD3PdcCF3
z@7J$xR_jGxr{|R;QDr7Xt{uV&=^nmYJ6s=1f{fC4o)xnUW;)tO7Sxtw=dGvhZ!EOx
zL|OhaOU?stKwt6@CpqTg==yGDn#zy(j)#$wsbX`J(-W*lDr!1DOZ^*ND2*GQ-}|$J
zeU?|~i#Dc>G+7MAyz<CQ#JnN~8^S>eYDWaiA%00%4eAq9;J_Hhn<DuP_v(2`@5wN8
z+}7tf%7sE1GE-rkI8Fj%W>^sQ(tEPq)=q@nLUc5nv#VjKm0;LEKy1Wj)TNg2z`Lhm
z-okq)XrRbq?p#ztYpHw4?5Q{U{5GlTF`RwBB13z-BGea9OdpfHe}7@0UAw`*qaE|w
zR&d7m!@Lm2Uc^&hJEHs^hf3Vty45oa%H_Fn9yxnr+0||l&30I`&*Bdcg4Ugyh>J`4
zTIbv~E_t5yZr>*Jl(&V8!1}sC|A2K&tt*&Uat*<Qa5r-`AF=?z&P^>aCw1-PU0(u1
zwfrP34#2GZed4mnFP@5-88t)W>h>60#k>7CC3RQ&@WhJ(+~uMqO$#5@zt<S5Ddbmt
z$#b^oYy%NZgt{gT-%1E*7m1?J9H3oX@pFkWK)R=~4YywvKMDC;qg@&5Nd5=7B2R@+
z;qDUP<NN%Bsmzl$!EWR|9l;~U`1l^5mcb|fd=m*6{I`?s+=aa{CVCQ^{=z%m)>`U2
z_4;M$-Gbe35KBc%C`y|8e>jy!H&MG-31h2GM)oeG-lS*kncR~fg{&jclG~r6huYgT
z7gE0r(|}cym)fu%h>GQ*=6C`gg!zT-4v9nW9mL-{2)Q+iZxaT4csf7vXfkACZ<~n^
z0V&L-nWx=w#Wv~ep?!i{1a~}L9G^gblLX??BH)0Rm)mdnD0@7NuH5&?<tv-dAN8om
zhS2Ij9SEq6m>yGBr~z(vrU4a(NlG{;GP7QK{3IA@9&y8qJUEJ@K%yuF%NMhC%Z*~R
zY`Vus?v*54Ih>DWCh?1=@4nlha7#1SZ05FPh&h{s3N%4!Oa&irRN(UPuO|$qKUHx=
z$?%h;F3x&?ckA2Q+X7||2Y_D8Rxf*_%O@^`STmBZq`{H}xNsb~y+DTk9YZwge{-=v
zQgC%}as7@pn{}DmS<+0kIpd=CE3+|zJ&V2n7W?641t7zR&a?k)g=xBHB|+Qr+my^q
zZrPF+=AcLeP&FFaI#8X9$zX`xv3M&4!BP}D&)frufZ@Yo=49^NDkWNeF8X&<5PP}@
zwysJ>SU?MSUeZ50Ul78de;M~LNd7AyPYiyPa#atXVIHqFloSgY>ZdIjuy=7%_Qlcv
zOv8VH%hJKtaH$uaig7PZb=<eQWP&wbEA~`8^cy-}N5XQ)kpNWrQ|?;Fm7&A@qrW~+
z_erIid~=06^7jBPpc2QZ4Nk}^FvRyWMJga!;3nda^x}mNSU0mrM*tXN@CGr((wnnC
zeM$s^gN4V+(eNA(2gmnTDLw$F)RQS~KzN7;BqHjvBMrdC3t;-2yA@3bNDvp<)jTlF
zEAHLdhM<Eb;z3a~Tm<c%3)5Scq%>xG-bAcCez4gEc7AVo&<PMvr=S;VvQvODALW64
zP%umVt_Q9$HBC2S!7g{Tv$Z$_7(jX0+Oc|VCqZ4l2X(3Nr`?OkL@at+asUsSB0A-F
z5ggsWpu-{j)Z{mh`4u8L2SupkOR74q_th-^r%IA91Ho>C3_}e)CFgIt(e*ka^%XR&
ztW(Mf{s|>`;g@F}%y5>F3u}4QTpkRe(%+d)5DaAt9|Od^M>b7z%PM%mTfi(?ADg@O
zKUM`=bZ2h?hjJ)5l%<SDzoX@kJidPcNB?KO|L1&HQ($`dDSSx`^a>+pb*e<E{v^XD
z3)M_(g?mc&{}yfdRvd)`L?9kP&G|odoyka$;qp-M0Bi?Fg2TKB>t4&rkM@IY2K|8`
z-EJzgS($A0<mLbdUJLPVSjA0!L!u-juz3*iUByaB!6KP{V<*&Nd^B{o&wD!v9{a3=
zJoaUn(z3oXr!kV>_%$O2L5d&4A46`Dy&uZXj-G%jsBYZ$>$@Jw^|wU?BZZMjh8j(v
zo=*H#+;Qc&y+oifKV8&<yN?8V5UhVy72fJdC_a*bq1-!|)YCk${bgqrQ0i2-zaSB(
z;yC^8+&Se;y&Q%A+xM9&M$YYhQ`h`_U+sT+pRiQrR1{Qn-o&H5beGNQnB-|Bh=rWg
z*6y?=pHqRaUa3tw>k6ih+>~)mkVazc*GO&(9Y&-pR_mO&^eb)d22Ub5{9}H95EuEc
zpN3QpZ>sx+tk=9EjnIW=h9!bgu73w;r6XM7Q?5#HJGdN5O3%k6M8JZ~*(L&a?``a~
zmm#$oC|16Oqwk$-t(vG`>@-J0%ocv>0TXL6`&#vB{aH5~PBsGAw2gTb$?G`g*9E6V
z-M&7Y`0Jg+AaUX24@f!#M5w=J%oNfhVW()>uG;*Wt!Ktnow(x(8VxvwlOL1Ux&FZH
z{?~r-p}Bm`z4m3WM~1brq0e%-&z$rgl5%p7cQg=XqbIY4d&MAa64=7+!r~{^J@NZG
z(y@Q<?@tE^l<w}E@5#&#E}ku_sH_->R`o={r|F~!K%Y>cX1C_kBA41=^!vC_POO@@
zz!OmzHFy!`-(2mylR!E@WUE{;Tz*H2b*45jUv&fz4MHQ4M3jpsCikOI({Z_t?tb5%
zOTORm8)G8dnDkziG9XJ2nDeFqzo#1n;1A0(lz6C7tl{o5w|UAstU~(%0*R(R>0qEk
zvAQEaS8$J=gQ6Q9Y`tKKg(R-K-~eAR2Iv24D#}m<j9(5ejCEJ<nYfL^93co=W2@Ge
zM)Clyo#njTfj%e>&<tq3;eJ5)DFAdxDt-t##?Q!ppj30@;HvS+uBiOL!(z+UN~wO|
z`^M2p4o)yX)%^cL9aa?GW;pQUX?}(aM6OIWrd=l_&H9$2J<yr+<`mCtsZYmQWI7q}
zLntxrvSU%mDKjR3M4Pm}KAkG(NFQYT9G)RgL|T7*4Jj~4t|}-HZS_Ow_u=`1Abvpo
zyE5eOT=1uq<=Y4%(}W!Pa}WjyId(M^`8yWWERsh{f7B%LO(dSOK>EiE>BDOf^q0nQ
z7Dta!IUv5?_KC#D@OIgLQlz9a5=xHXjS;pV&IVv$2E<t`ckV9!iw%af8(7t6=h|-~
ztr_5h53=wj1#>1~EoT8Ic^Ark7kX_u?zBR_M+)RdWkQMcF%PB<WDO2`YJli6+ZhYk
zAZ%3S)leZx^Yd+L$c@#7)#`vO+VBgCbe=c2z=_shVxO=#^)KG&zXcQ6m76M`pbm@&
zgfpN(l8=O7%-h(rH%>^c{A6dieE;c^<n-mggJ*mD5%IU4v%+_o>*<Do4UM-?j$Y9!
z;DT@F*n&9VC;MJk*WZ~3%`uNC7qs_+e;I0Ko#l6ntNaTR0{q0^WvS)83st>ybiQ1V
z6L%4x>j7VKFL+*A7Udmf2&gu-M~ixmK5P{|tfznwo+<);h1gCgBnTpF1|NrMZ$>ZD
zNj{>y0LvlwHdzagW!}vXYEByi4y>g25<e8(rZ;f#8@Yp+&qt3@LFi)A6ppz<P5v2|
zT%!s!1(WEfl3@=;+*E6cjP*mg@5A03hA=Dzv&MCSR>@=l$u=xv060|$0YC&W29>_s
z0RHAY73Y=-5$@TZoEV?oWqHkvd$*nwQECB7Ho3=_B}lfAu<7;+5`Tgq$y}$ZOt7od
z6G%@y>c{9MFiVmf$MWhvqHJ6z<E}ic(`DvYPYtRVdrMNF7{T+!cR1Hjg8a<Y@B8J!
zBxnHD6y8`0<5Wldt2>b--=R$DC#1|75;p@#VJ{#CR{;Row_$<JPifM##JR`9|FW8B
z4Bx+=Rze(m)}i4-WM4w?)dOkAzcUEah~)fD+6G2H-C~`3L&vMIx}{%iqXxiKF!Z7I
zdFlxmA=cM-b1YyDVw?lL(W=8A55C#UB`9x_`<oW9pdb8reRboi^(SSNu)1>da)%;+
z2E-56?WzG^78H#~=^9!cI3;4Ol*hYhd6}2IthywC|K4QwOs$Wx9ilpsz99H>i2}Y#
z3&@~?BNKhuq(*nIVsrdW_?3rp5<Cci7^)DLv_?sJ7DGhP>Yx(9=gYlOGu>u|o73&d
z9;@y0LUx0qJDy)AMIou6;v((^y;0j6{z{7jENJ6yfIdpxj=9_7C+mpsrlFtFXZj;-
zVEaw2G5$`W>P)6xdtVCeUiV`U$SZ{oJ#7W8E$xe!gxw3vKJ<;;LFye2by3Rf-i11H
z-EdGJg<d537j#-!MLz(<{@%+!=wXhkRLC%ZlJBHZmAs}CV%BcBJ^xVtp^Iscu_k64
z{BQIDRRj+lZM7;wI%=Q4-pTs4N_TXe6Ohz_ef*mRI~fr4>n|>CvH27Jr}N%L4OTJ7
z^F9p1V48-n4Hx)sJ0^IHpl@Axcv^SW$mPRQdmTRa+WhwinVtT+gbmh@k<=BK)nSlv
z5IoPFyJH0))q4=Gw)G6KF8HF2%{N_WHxtOWam5I$XEjHg*5uC_5y^`@x!v2WcUh0i
z;pw|u^7LHf3{X9N&uVXN;hkP%><e8&OW7vgL!*ew0lZ7tFDc7DwN1HQX8o)DRlDyW
znYRL9R7Ei63EX590F515l?%@Nbv~?FTmeZ7Qie~r6GamuSGBi3u+HIBY&Vp7?$+xk
ztT8Mv&sTd)yxlbLzA_hXXeiA9zHu0V>}`;*Z#Tb+>fytMxeM|T3L%bx1Weple!tRE
zf0A2IsFOFpjL*?2)x*70w!ey?YGkWNzy|W|O&Er2z*YQpkBQ<Qj|szU?e$-n8jr(-
z;v+QRf1{mS&Q^Pv9#pcxZ|oOlAWaFGJ;bqrh~+bQ(DnwcNkzV|wQB3p@WC;BcpaUh
zx?hoBWlZ@WZ$E~YYI5}D9b%Lbb|mk_T=<@$ugB4lKzzDYYvLm?Z&0e?08h{^im5}Z
zKV55F%+?pxYxaQPJrdACbk88dOX0RBIkb&nSs-i!VgIDpbiA2R@-OS=tCQQK&ncZD
z@8Su1$+_hWezvx&9CK1TX%F`iy#O+Ye=k4RtWOf#KoB1)^L2tQaM@WXfjk$9b6ha(
zfsPW3Ch)Am9Sf?Xky8UOQa(18C{+U@7xy?@6v<!;(q3eGE;e`^-e`_AEPxZiQT~3S
zlJn$8VcTerjl~=|l^GC)pY8jS%;LNI*mk565u~pVaGr~$@@|8V1Ek0YpaMJgYIBr8
zeSExaiwJwG-yswLK)pqd6AFfo4HrBh(AWhryq)^mm*kzWvyvv8W3m9}?lJy~qYE_3
zsiN=?<@}c)*8Rf_)kbslOX%jpD~y*ud;Ek|?1_&*THd#ZU>YhRzfhU|K8o*1%0{rr
za5@sP)P~|RqkcrIjof(_-vg-;RKa37PG!iY*Dw<z*US;k2r}RB@ef*XON2qbLka}E
zvb?rdTu=M_`9`#G*=7-hn^M%2Qk;0VPXtNN@e}``mfAnxp^<nJBirovl=>qWOQ3P>
z^FFkel_e|KepM{ZsS@420Q=0hJLV9K1cp%<vK`p1@GriF625ARk$jp6<%6r_*6$A<
z3q`&2YYmgFC-n<#YS#QoXy$Q|Ji1A62&|yg8hQ~uH~(%D?1C8(awuP)*)C3re*m|Q
z2<lCgr2EPv&(*ib{R9Xh@MYF&@YCYOZ%J`u`Z@P-nx)qX!>3@!X7sNj7B5(ypF!M}
zGpkXHLY-G6&j;`m8Bi*5+4iHfH(IaQCIc!HV%ZyBy_k5%F5jqx5v~XIr}0S^&+{hM
z|2HJ-+=bGy+Mvb9m&70`gx9#z0MeV6O%S^Szz^yg+buWZc<;+aO1R8tSM0cA$(u9r
z*Ygy+W7_8DJ6`0tWNRZ)^uxrpe^Z)TD}qM~Ek7tk@K~z8@|piqKQ8g$nF3_%2m%<!
zQ_sb*%miSb`dhfEl3GWIdAf9KYG<e=%dJ~D)zk*vi@9spb19TjL<O=SoXCytcMS=V
zugl)(Ov8Lq|H7d<cjtsD<hdj(cGVu2`b>6*1Z5O;m|cjRJACBqzep$kIMa?qgnQ{g
za!oDUUS@BGSiIzv;Jp8cE@0syXzu~A69tHIw!b}lV|-}~5=*!sYRU3L*g9f}E+Y9J
z8>>v0QOV#s3$s*dy%r>Z8}lCraiD4cSJ70Lf}5h8RgLBeB$mwIwXRu0p!Fj@JI%xX
z7y^re<TWtq!bR+jJZtmJ6TExu(y*i487QxVy$uY(Bmmg8kaR3KCTs2TtTR#O@oJ$g
zQe6e+sx^*XnG5km08>%--Tji18<=zK*lChGH?L`Y*DHv<A*R_8?KAXdrgxFk_$RMv
z#b2xzB6V8fUM8iku;KCus)fx$k<<!xmr)Cd)K|Di>IT?^4A_+^Ln=dM1!FO`v>z;h
zsK}^H+mPy?D^MKNWyE`<!7B!9fP<8k`jO80RNEf080E^Q6AzXuHsZ5VP0I4}7j3_s
z(X{Jb6gCypyuPNo&VIhEBrL;DY%W|~x&FnW>?M8z{-XKP_vpMEW_v4;<E05jH$8(_
zq5=Sn0S3xCgtE=p`JH46l3nj@C^dMC*8Ad5N?_Cj?0XX3do!y2#`ll^aSzqao5#^V
zT|5#9|D8l~o6SMKDEzgdR?rgn&Yv8R;fRayNVJOvz@h8~OqwMBeYqe?R_WOJlA=)N
z;O=l;-y+wF+!W^?W2XI+h}@r5wm*92HiC7ErILYcz+SRG2?MC%`<j+}c55m@^{U{(
z>V))~UD>S*Zko>C2{W>PKu}W*2OWZ@;ZFG<6d3$>bM%CG^^2SK`AW9x+#V#lAL(OI
zOJkn39*6GKzp^a|I;l+n=(@9Ts+vP)CSwLBFH7Xc7@I||_2lRe_||!t8kdv4a(%zn
zxSt~>b(E*b#)<g#PT;GZyd4=L^yOV@E4ZCi;w9btA2xIRXe#-d;4@1I>I2P2;lVN&
z&aClIG;nCZiu@OCk*MK)1WqwxpfdY)xsDOpcAObFxZ-31&_ElC`l<s~WbBq~LoiPP
zC_jFZ|5Sdm50HaJCQnLm-2oH9Yx-;{DnQq_Kk)`gtU<7K!GQiD4d#{Y)iTyR!!j)>
zdfO$nG5G7JbVvG6Qcwx%U>2)E!YKItAwfjkHkltfTg;;3!7vvdr$OHTx3f-gs9G`z
zdGD}M5n})!sP?Na_(LH{?8aZ5&l?$gs5$QX6NFeX6!?CV(@NNszG7Q4lnP`Rv9#}x
z3&M;qDT1-^-`d0h05n&Z0@w+!lVeiVCk3ue>Ofg`_MO|w=Q!RDx<zbNI{vmA`_A#j
z2>VJiQTb>W-!z8H;vPfWvzr9?Chkoiub_8VFs<Uh%5g2~kUBA*W|AP5FKIt4iQFn$
zr8Z)Xe6~!3ytz;Sexv0#lEe=0xKudi)^se_cD&nbI4QR1|Ey;=D9*aJlc$IK-|H)A
z`J*g@*?0TrOTP62rpXyB(fWW1Qd+L`+wab~E*<Re*xNz@@BJxSZmsX3mp%Po+I_xz
z9}@1?`+tWpOU(YDH`aJ{^1}P_t4wp@WnP?`=<O<breXFy+?&;J$2-<FANvlwWrcAs
z3KfOkC`*x%4wv@1-d`p4f|+c*FpK4V&)%gRW9Ri-7cXs~`h^<Ar*;)DIOSSm^bdBH
z3}|i}?<&3y4rghlF-Z4ueO0(lWcgr<jUjflt3JnCmFe9Hk)WO0@(U^QQ@RPeT<pr;
z8dlxL3|-Y8bsHIFzN1_)*2((rzmfsqOY;-6=&$yEC`qqPhwdExjqG>>Nabn-Mhr-G
zXY?aXHH=Zc3Zl8usMY}OY)Q}EwtAOvv2w@G8??8$<L^Kg_ute~%in2Uy_|H&)?VBW
z)@bT~xcG}Bg-0xfO#cqO0WJMy<7_JWmpi%%PeQizqc>8+q@QaPS88n7%?y}y1W>;0
zA1KIUb-o+DgBIM2D)8-j{9a?F=4){gGoRsO<H1W#z4Hfgo`$M3{Vb2GmLep+qIL(b
za;Ni$)wwAdR-q4==p%@9XcUz!e1Fe<t+uJz+bucYx&J^j#i%FmE(HuYslMp%fRExY
z=wi1qor3xPmd-yY5&}5%<d9wmz%5B`!2s)ud=gCQ8wGyN?YynEoZ!BJ;IK-DOWs|)
zd^~x(3Jl&j->HB4xS0npRNN2#7(e&bDOO{Pp}{MDAgHg>erSN;k6q8tS$p-*c|<=9
zZRH-(D+iLz(iep=R_xEe`1+JZ6p-Kvyjn3Vp=Eqj5zVdjx?|pBWsl9H+sDScf9;Ev
zq73>-hZf|%2k%B7u-&cOwl_#+#=TT13H_3)q|e@EMz9mM7G;fNB{JKPa`QcIvQ;55
zE*IvcXUpQ5_J6f^oexc(-CL20V8Ma37C~&OpaMccKtRMvWh<M2Kv04ZhD>FPOl^T$
zHetL927(Y~n6go1i3|fF>}d>BM%ekC$I$lIzu^7y@*$t{jNJFR@B5tVI@iTg3x{)0
zOqn<_H_hppAL+5H9i-D4h7n-8gTNf~h-=%1tcUdhI8ocqS@76!`Fv>-MOFJ%ddwB;
zrnw(Eb@LL`jOB~(m6qWEZZ^;Lp???i`xENHv!q|G(3GIgDr&lljfN8{m0KRYCWLKR
z3f#y6FU7?ngG99)4pa2pRa&xXl%LB+2YH_xhwZX_=i8w%yk>5}_FA|I`5pZtZKGtY
zu2f+!N|-YA`o|e#;Q_g3RBT=i|IDo4y4%EOiS!M;=?sBa{yH^iLZeTO!^1e$ybzB$
zP+=%NQW8p%9}F2vUp%XRQT##4tlkTJ(ez?z_4ns3)<*=z`caoxo=axm8olEjP<dq!
z-H()g!Y`#r)RA=UMJQ*B=hHV^ry@zyK40pcx6~iLb~anL^)ai_nW^=f=x%lf%et?X
z9%Ss^)o+Q@=o8`(X8L5eH<~z=bn`4P4?wG^n&UhlO&i)FWjZ6rJx?Ez`r4zWCM_O%
zr?S9S9foLgO$;}xifVJS;*E73SE(E^WftMjm|k9W!Zy3;HvVT1IeIcRz314QWVEWC
zBuUjwr~vOG$`{+#O;5+8<w<@k)q~7$ES1EEFu{)4Bb!-gbs3L!d9jL~ykd88hlVHH
zQ<cPR+D<N)kngqUlMne@-R3o>CrOFpRG0;+ci;QA@vEg`Na*)iH3lVULYKtcC^PZi
zN|a4Znt#~TH;heP77|`ht<)91?!*CULPG;$D*`9eD$umk)J~;}u_lLk1L%L<`qCm9
z0bU~HB`5k}?fUoezocm=oqzs5S&0s(aim3sK*VzM6jw{&0X3Bfl3aNKnH`sBFSo&^
zd3mrxVWF+$uwiSE25*>6wcR(Z8KZuk(+Y%OQd7kb$-aJTG~t$9ArjvAK$vMplD@GV
zy29`n)~k?I+$R8gQE9+ZH0g8#CHfWG2s_@xwccWb!r7WIlzPtD%bf9zlbz_(lw`WQ
z^ebDC@L@RadN$#ftJ&=@bBoA=xcc9VnX(pM)I^b#ZgibXM>9F(4*AsTL~OF{+?1r6
z%=Yc84n*%IQ@YB<BR96nt&x(HDfa;ZPGOPgLgT0QlfzSRP>X$|=>9p~dF(9>a&Md9
zyIs%k=NQQ`)a~!C@r(5rPYY{(`dK0muPa)NQ8<~Gi|R@<dcWqBRcumi*1?t9Zrl~6
z^@kj~-`<DsXh*WO@}e1f%@FIbFu_PsF0f~FZvUtoqN(eLb-EyP`$8dU#K;9r(R?NM
zBh_9PLl)*xO>FC%x&+$xRR)3VILy@6l0W`-!VImDR~uJ0w4WS3pF|F0?8535)yxiJ
zc=HFCH?HC`t}RTv8s{b{$`f}g07=Lbs`V57ej1k9y%=u2Se6NTuvbn)RZnIg3lf)`
z17OPP*I(Q8d)?(dK`)AvlZ^3VR;U@;)qhDgJWA})O<aCj<R0wS=<S?VJU8GNe1p!w
zFdSq?Q56pTQ?EeTE$d6ob~t_~Cscb?!n$yB4TIg9v&F{d*!GHu4_Ss=Lg#^K&T@?o
zDy<k3RWM#xKx4MhN9E?E(<U*((uHNlb+)~@?6$h9Pu5yOC33VZkK`rd^6NCR<3OBg
zTJM}`?N;k@3+ID+ImUtP!a+>%WQ~X&#;ct21JzzXHZgFynQ&{2pEY}%!hojh<j_vD
zRq=wfvuk~$d3QG=to6u}(+d~H(OQ{~cde@m?eRasek#G?8b9pIv|&0tBzXs(+w^K;
z|L`_aO&*2y?|K<YbuPD5u*J(>r%pfGdPJfa8eqFWviI8RFc}RsleaPBy+K;MNo|P}
zqS?#cC+&=d$-m_|MVks!FnjX%lHZ~*yanU%q$}>GSbfT7DBB`u*0%{c9d6jb{2`SH
z>af`T`0%RM5DX>`%iFAV{y@-5kcf?+4P~=cQ>c2HSo&craJhZ**oU#~A<3fcvCDa|
zTm$*W5VRcP6agDoEZS(l0PZMu5GHbDmu&4Xt`QA$M~pMDR|*pnU%x?RB+W~8MZ11i
zP**@2`Ir!zuwwssBO!EcTpw>wUPseYPAjNeScUI1-L0HoKRxfDTCSzkS`x|YXlO|3
z`=%c(8p0w?gquV@aj2GQNy*Phbv5xu_++FjV~VIgX?^UQ-2&N0+Tr{x(IF^>0EcCz
z@?g?%pJ>FxkwtM<B;*P7_KFF#bN1H<S;%-{$s5E~=`|(X9Dh&gj|+>f+8=+*IW*o7
z5d^VKK7Zd_0((_z(c!aG$j{8pAe;D`gH-+;X8*bsZY{*!E+Re&XV8w4*_uBgM)*rd
zzKiV%lv<t=09eJbjhN+58(u2J_hs+CDAN#9sN$)C(kshpzm-7fs*N}<`jJNl<)|9d
z#xp`tmYcL7*r=SsD{5s2_x%m%BTxczUWW1R>tVz!xTWr*#|#+Bpb*LoptN)BM@=F^
z%5_rL0QC3-iLml32B=)R0b_r)d(YleHt+paHu+z4tS*cp=nx%>IdE~ohCLW_k>omW
zTUqeEZ*spVSxl)+v1rFNWC~tCS#N)PNjkzF%$7|pfwbhzgV|C-<m5&PRD#J>+v`|a
zr$Mt+e@BO*V<GiEri=)sS%7*%qa|>p-wtuGak3{Bn<tE3GpsSs?e9S4UWi|Aeik!j
z%i7*cHDG%udX<%vV8vJYn`Rs5uT}xW@EMx*yTDAteSY||b$g2PxZmN}t0GHF+<*qs
zzKBp2$YP=z4V<i-oUjDy$CLXfkw0L|=Kcg={azk{Ob}?i(`+$^ECSRN+)9_`s3kf^
z7QQ6qoN@pWJiIx$`O^{5$Gp;uU;1vC?>3*2*>g~!AgXgk8F41mw_XSo<Xyc?HeGQ=
zTCG8vMh8nCJE)b{!DGOpWyBY7GdPyjmjC!y6=n=nf1XuRs$D>O-51~n3|^}9O*BO{
z^5jouLP^q^ssiecGpM3W!6Xp37|rzNUj@NNQD83T+cKAlP5^u|0ZcVPs&K(f#GwIY
zY+mA+%zy5l<&X`OXJ>TN!}#Uw;a(oJC!#4IKsX}n_UR^M%0}RDpq91K7(9Cj3RrJD
zzS-CUbA|pF{W}`y9jBSbW$}ZtV)~f}t7);E+0gNHr|Hj=SwNK_%r~@a8-PrtsOJ}4
zeNgW)r`NYsww9vKmW}@j{XM`|84%4V7c+n$S_d+x2;d~>Ioe_jy#mPxq5-Nru4qx^
z1HyfrVA5PU{cQLTGH<IuU6Z2pfQXbojb$DGndizghlUQvienJNEZ0;dhL@nO>nLOl
zMD0Pte=?h000qN9)>IoV13)0EUVfYYJ=b_hrE}I2epr)5doZ^_;nFB%03?%=xK4uf
z8=k6wF+xU4!LPWsqM4d=(>3jbej5&;_c*=X57R(poU0wkj;oqX#7=iiiklRy63_b4
zrJ#Z}T<F?p0p#v0ri`qogHV4f9F=&_t#*O{M+Q4E_k&d>oYf+yKsnWv;xYj6p?49*
zo^7R7rakGq*y`2M{RyOZ0b{g`)Zz?1(QjKY=e(c<25c?C5L`7z-;8-w%U0Y6GmDIs
z#CwYj&<j(auNAgm+)s!!oA!Y?0^2~_k5?cwjuB@dTr+GNI5;Mq$RD3oD)#KPU<~<J
zS1yo2Pdt6P2SC@(Mj`*~V~(Y@vl^D^=SSXsNkA*)KeI0MjH7I}Xx7WsdP4&m(ghFl
z4eQ)cIeMJdL3!c6$tc4B!f*t8`Ykksh>P*j+n&5+v47anfJ|6TX{vM89AH0-BTx{c
z1H_>-?u|y~o-fTkC-??Dr;=ADZ!`K`BbC>&b+fQiazl@M-2hjcfI0E%rB02{9ciJ0
z<?uLcEMPeVc+q~IR*b$E*kMSgl0ey#!F)OvbsPW$(O~TtI4H&+Y|)finQkrg?!tkx
zslPvdeq*|@y|3$qma*2uZ;wjV@DMRQSl*zy(lH*T-o*OphYolnoNX^gM#{C4a}cBD
zs@s*<7%Aj5_qzl6noV1x0fNE<SvP}idsk4lG7>6O+;f?Or;rjyS^BNM0$`dIM+d!5
z85na!?ftbHxpV@!;mO<gvmcohzIL6`Hpn)<MeE8a>5;}g8`6ELej32QA4{n0G{hVU
zpobNiR&9#F;CHavO6cCiL4XIUNf|qSE9sEECZQCF;3wAu03^lz<bKmYlUxBgbw|XZ
z46->Vi1@10vVzxv)A%|J0BFsvcX_jpQ(Z_LSO?i7Iy-EA<$nf~Y5`ml6P1TsA`$XE
zM2JOnf;Is55XKuk@Q{BV&yov8VnN&>i;xFegu20-P<bXtA7cUh!ys-ckZjfzJv3i}
zTz}jD|MMIH-7pn;VvX*AKk!3<R^at*JK_2aPBHnN09}N~x5vfv{Pd{ik6-1nwf6`$
z+oP7Nn!t8k6gxk_`qqixfku$BauI{BFFZUZ*(B~p(Nj1EZcgsFWj#c?n%l}d;Tx+)
zaP77q-{1UMyut9Cd;m~AUza6F=oB8x5MipmM+Wrz8}&fEqj;WfyS6wSmo6E#17#;!
zakTFt3rFabXFqrl8JolZ9viTV72vr(3;kvhd@y-;G@>TC?^L=Z@-8Aq1~9r8kRAv(
zPnrPY><M5Xz5QPsL>9b>{$(QIZjG>+0af<oM!j+Fjw>xPwKjmcr$|T$!b~B_ydd1;
zAAV_H99(YpXFr%<2#5k_&=~Wx6}MHTOGBpT4o8y${XyQvMQ-2j_TLZpA6m9S-a%)M
z1xW4#5K}?mX+OZ2e_j6qx&B(2=TX-0PXWf&yCB<*Yd5><g=&*KUJkqK8rg=96JEBO
zbgmgc`pdcF(kYmon@goT6ea7oLu5t4)at~>X#dUXvku#TV(M!E{`GMIdiGZCn$Qrl
lxvw+D`uN|?|0~17<k&LiTKE{gMcxH}YS*=|5wBQ2{y)l&6vF@j

literal 0
HcmV?d00001

diff --git a/docs/design/fused_moe_modular_kernel.md b/docs/design/fused_moe_modular_kernel.md
new file mode 100644
index 0000000000000..0943454d64292
--- /dev/null
+++ b/docs/design/fused_moe_modular_kernel.md
@@ -0,0 +1,236 @@
+# Fused MoE Modular Kernel
+
+## Introduction
+FusedMoEModularKernel is implemented [here](gh-file:/vllm/model_executor/layers/fused_moe/modular_kernel.py)
+
+Based on the format of the input activations, FusedMoE implementations are broadly classified into 2 types.
+
+* Contiguous / Standard / Non-Batched, and
+* Batched
+
+!!! note
+    The terms Contiguous, Standard, and Non-Batched are used interchangeably throughout the document.
+
+The input activation format completely depends on the All2All Dispatch being used.
+
+* In the Contiguous variant, the All2All Dispatch returns the activations as a contiguous tensor of shape (M, K) along with TopK Ids and TopK weights of shape (M, num_topk). Look at `DeepEPHTPrepareAndFinalize` for an example.
+* In the Batched variant, the All2All Dispatch returns the activations as a tensor of shape (num_experts, max_tokens, K). Here, the activations/tokens that subscribe to the same expert are batched together. Note that not all entries of the tensor are valid. The activations tensor is typically accompanied by an `expert_num_tokens` tensor of size `num_experts`, where `expert_num_tokens[i]` indicates the number of valid tokens that subscribe to the ith expert. Look at `PplxPrepareAndFinalize` or `DeepEPLLPrepareAndFinalize` for an example.
+
+The FusedMoE operation is generally made of multiple operations, in both the Contiguous and Batched variants, as described in the diagrams below
+
+![](../assets/design/fused_moe_modular_kernel/fused_moe_non_batched.png "FusedMoE Non-Batched")
+
+![](../assets/design/fused_moe_modular_kernel/fused_moe_batched.png "FusedMoE Batched")
+
+!!! note
+    The main difference, in terms of operations, between the Batched and Non-Batched cases is the Permute / Unpermute operations. All other operations remain.
+
+## Motivation
+
+As can be seen from the diagrams, there are a lot of operations and there can be a variety of implementations for each operation. The set of ways the operations can be put together to make a valid FusedMoE implementation quickly becomes intractable. The Modular Kernel framework addresses this issue,  by grouping the operations into logical components. This broad categorization makes the combinations manageable and prevents code-duplication. This also decouples the All2All Dispatch & Combine implementations from the FusedMoE implementations and allows for their independent development and testing. Furthermore, the Modular Kernel framework introduces Abstract classes for the different components thus providing a well-defined skeleton for future implementations.
+
+The rest of the document will focus on the Contiguous / Non-Batched case. Extrapolating to the Batched case should be straight-forward.
+
+## ModularKernel Components:
+FusedMoEModularKernel splits the FusedMoE operation into 3 parts,
+
+1. TopKWeightAndReduce
+2. FusedMoEPrepareAndFinalize
+3. FusedMoEPermuteExpertsUnpermute
+
+### TopKWeightAndReduce
+The TopK Weight Application and Reduction components happen right after the Unpermute operation and before the All2All Combine. Note that the `FusedMoEPermuteExpertsUnpermute` is responsible for the Unpermute and `FusedMoEPrepareAndFinalize` is responsible for the All2All Combine. There is value in doing the TopK Weight Application and Reduction in the `FusedMoEPermuteExpertsUnpermute`. But some implementations choose to do it `FusedMoEPrepareAndFinalize`. In order to enable this flexibility, we have a TopKWeightAndReduce abstract class.
+
+Please find the implementations of TopKWeightAndReduce [here](gh-file:vllm/model_executor/layers/fused_moe/topk_weight_and_reduce.py).
+
+`FusedMoEPrepareAndFinalize::finalize()` method accepts a `TopKWeightAndReduce` argument that is invoked inside the method.
+The `FusedMoEModularKernel` acts as a bridge between the `FusedMoEPermuteExpertsUnpermute` and `FusedMoEPerpareAndFinalize` implementations to determine where the TopK Weight Application and Reduction happens.
+
+* `FusedMoEPermuteExpertsUnpermute::finalize_weight_and_reduce_impl` method returns `TopKWeightAndReduceNoOp` if the `FusedMoEPermuteExpertsUnpermute` implementation does the weight application and reduction itself.
+* `FusedMoEPermuteExpertsUnpermute::finalize_weight_and_reduce_impl` method returns `TopKWeightAndReduceContiguous` / `TopKWeightAndReduceNaiveBatched` / `TopKWeightAndReduceDelegate` if the `FusedMoEPermuteExpertsUnpermute` implementation needs the `FusedMoEPrepareAndFinalize::finalize()` to do the weight application and reduction.
+
+### FusedMoEPrepareAndFinalize
+The `FusedMoEPrepareAndFinalize` abstract class exposes `prepare` and `finalize` functions.
+The `prepare` function is responsible for input activation Quantization and All2All Dispatch. The `finalize` function is responsible for invoking the All2All Combine. Additionally the `finalize` function may or may not do the TopK weight application and reduction (Please refer to the TopKWeightAndReduce section)
+
+![](../assets/design/fused_moe_modular_kernel/prepare_and_finalize_blocks.png "FusedMoEPrepareAndFinalize Blocks")
+
+### FusedMoEPermuteExpertsUnpermute
+The `FusedMoEPermuteExpertsUnpermute` class is where the crux of the MoE operations happen. The `FusedMoEPermuteExpertsUnpermute` abstract class exposes a few important functions,
+
+* apply()
+* workspace_shapes()
+* finalize_weight_and_reduce_impl()
+
+#### apply()
+The `apply` method is where the implementations perform
+
+* Permute
+* Matmul with weight W1
+* Act + Mul
+* Quantization
+* Matmul with weight W2
+* Unpermute
+* Maybe TopK Weight Application + Reduction
+
+#### workspace_shapes()
+The core FusedMoE implementation performs a series of operations. It would be inefficient to create output memory for each of these operations separately. To that effect, implementations are required to declare 2 workspace shapes, the workspace datatype and the FusedMoE output shape as outputs of the workspace_shapes() method. This information is used to allocate the workspace tensors and the output tensor in `FusedMoEModularKernel::forward()` and passed on to the `FusedMoEPermuteExpertsUnpermute::apply()` method. The workspaces could then be used as intermediate buffers in the FusedMoE implementation.
+
+#### finalize_weight_and_reduce_impl()
+It is sometimes efficient to perform TopK weight application and Reduction inside the `FusedMoEPermuteExpertsUnpermute::apply()`. Find an example [here](https://github.com/vllm-project/vllm/pull/20228). We have a `TopKWeightAndReduce` abstract class to facilitate such implementations. Please refer to the TopKWeightAndReduce section.
+`FusedMoEPermuteExpertsUnpermute::finalize_weight_and_reduce_impl()` returns the `TopKWeightAndReduce` object that the implementation wants the `FusedMoEPrepareAndFinalize::finalize()` to use.
+
+![](../assets/design/fused_moe_modular_kernel/fused_experts_blocks.png "FusedMoEPermuteExpertsUnpermute Blocks")
+
+### FusedMoEModularKernel
+`FusedMoEModularKernel` is composed of the `FusedMoEPrepareAndFinalize` and `FusedMoEPermuteExpertsUnpermute` objects.
+`FusedMoEModularKernel` pseudocode/sketch,
+
+```
+FusedMoEModularKernel::__init__(self,
+            prepare_finalize: FusedMoEPrepareAndFinalize,
+            fused_experts: FusedMoEPermuteExpertsUnpermute):
+
+    self.prepare_finalize = prepare_finalize
+    self.fused_experts = fused_experts
+
+FusedMoEModularKernel::forward(self, DP_A):
+
+    Aq, A_scale, _, _, _ = self.prepare_finalize.prepare(DP_A, ...)
+
+    workspace13_shape, workspace2_shape, _, _ = self.fused_experts.workspace_shapes(...)
+
+    # allocate workspaces
+    workspace_13 = torch.empty(workspace13_shape, ...)
+    workspace_2 = torch.empty(workspace2_shape, ...)
+
+    # execute fused_experts
+    fe_out = self.fused_experts.apply(Aq, A_scale, workspace13, workspace2, ...)
+
+    # war_impl is an object of type TopKWeightAndReduceNoOp if the fused_experts implementations performs the TopK Weight Application and Reduction.
+    war_impl = self.fused_experts.finalize_weight_and_reduce_impl()
+
+    output = self.prepare_finalize.finalize(fe_out, war_impl,...)
+                            
+    return output
+```
+
+## How-To
+
+### How To Add a FusedMoEPrepareAndFinalize Type
+Typically a FusedMoEPrepareAndFinalize type is backed by an All2All Dispatch & Combine implementation / kernel. For example,
+
+* PplxPrepareAndFinalize type is backed by Pplx All2All kernels,
+* DeepEPHTPrepareAndFinalize type is backed by DeepEP High-Throughtput All2All kernels, and
+* DeepEPLLPrepareAndFinalize type is backed by DeepEP Low-Latency All2All kernels.
+
+#### Step 1: Add an All2All manager
+The purpose of the All2All Manager is to setup the All2All kernel implementations. The `FusedMoEPrepareAndFinalize` implementations typically fetch a kernel-implementation "handle" from the All2All Manager to invoke the Dispatch and Combine functions. Please look at the All2All Manager implementations [here](gh-file:vllm/distributed/device_communicators/all2all.py).
+
+#### Step 2: Add a FusedMoEPrepareAndFinalize Type
+This section describes the significance of the various functions exposed by the `FusedMoEPrepareAndFinalize` abstract class.
+
+`FusedMoEPrepareAndFinalize::prepare()`: The prepare method implements the Quantization and All2All Dispatch. Typically the Dispatch function from the relevant All2All Manager is invoked.
+
+`FusedMoEPrepareAndFinalize::finalize()`: Maybe perform TopK Weight Application and Reduction and All2All Combine. Typically the Combine function from the relevant All2AllManager is invoked.
+
+`FusedMoEPrepareAndFinalize::activation_format()`: Return `FusedMoEActivationFormat.BatchedExperts` if the output of the prepare method (i.e. the All2All dispatch) is Batched. Return `FusedMoEActivationFormat.Standard` otherwise.
+
+`FusedMoEPrepareAndFinalize::topk_indices_dtype()`: Data type of the TopK ids. Some All2All kernels have strict requirements pertaining to the data type of the TopK ids. This requirement is passed on to the `FusedMoe::select_experts` function so it could be respected. If there are no strict requirements return None.
+
+`FusedMoEPrepareAndFinalize::max_num_tokens_per_rank()`: This is the maximum number of tokens that would be submitted to the All2All Dispatch at once.
+
+`FusedMoEPrepareAndFinalize::num_dispatchers()`: Total number of dispatching units. This value determines the size of the Dispatch output. The Dispatch output is of shape (num_local_experts, max_num_tokens, K). Here max_num_tokens = num_dispatchers() * max_num_tokens_per_rank().
+
+We suggest picking an already existing `FusedMoEPrepareAndFinalize` implementation that matches your All2All implementation closely and using it as a reference.
+
+### How To Add a FusedMoEPermuteExpertsUnpermute Type
+FusedMoEPermuteExpertsUnpermute performs the core of the FusedMoE operations. The various functions exposed by the abstract class and their significance is as follows,
+
+`FusedMoEPermuteExpertsUnpermute::activation_formats()`: Return the supported Input and Output activation formats. i.e. Contiguous / Batched format.
+
+`FusedMoEPermuteExpertsUnpermute::supports_chunking()`: Return True if the implementation supports chunking. Typically
+implementations that input `FusedMoEActivationFormat.Standard` support chunking and `FusedMoEActivationFormat.BatchedExperts` do not.
+
+`FusedMoEPermuteExpertsUnpermute::supports_expert_map()`: Return True if the implementation supports expert map.
+
+`FusedMoEPermuteExpertsUnpermute::workspace_shapes()` /
+`FusedMoEPermuteExpertsUnpermute::finalize_weight_and_reduce_impl` /
+`FusedMoEPermuteExpertsUnpermute::apply`: Refer to `FusedMoEPermuteExpertsUnpermute` section above.
+
+### FusedMoEModularKernel Initialization
+`FusedMoEMethodBase` class has 2 methods that are collectively responsible in creating the `FusedMoEModularKernel` object. They are,
+
+* select_gemm_impl, and
+* init_prepare_finalize
+
+#### select_gemm_impl
+The `select_gemm_impl` method is undefined in the base class. It is the responsibility of the derived class to implement a method that constructs a valid/appropriate `FusedMoEPermuteExpertsUnpermute` object.
+Please refer to the implementations in,
+
+* `UnquantizedFusedMoEMethod`
+* `CompressedTensorsW8A8Fp8MoEMethod`
+* `CompressedTensorsW8A8Fp8MoECutlassMethod`
+* `Fp8MoEMethod`
+* `ModelOptNvFp4FusedMoE`
+dervied classes.
+
+#### init_prepare_finalize
+Based on the input and env settings, the `init_prepare_finalize` method creates the appropriate `FusedMoEPrepareAndFinalize` object. The method then queries `select_gemm_impl` for the appropriate `FusedMoEPermuteExpertsUnpermute` object and builds the `FusedMoEModularKernel` object
+
+Please take a look at [init_prepare_finalize](https://github.com/vllm-project/vllm/blob/1cbf951ba272c230823b947631065b826409fa62/vllm/model_executor/layers/fused_moe/layer.py#L188).
+**Important**: The `FusedMoEMethodBase` derived classes use the `FusedMoEMethodBase::fused_experts` object in their `apply` methods. When settings permit the construction of a valid `FusedMoEModularKernel` object, we override `FusedMoEMethodBase::fused_experts` with it. This essentially makes the derived classes agnostic to what FusedMoE implementation is used.
+
+### How To Unit Test
+We have `FusedMoEModularKernel` unit tests at [test_modular_kernel_combinations.py](gh-file:tests/kernels/moe/test_modular_kernel_combinations.py).
+
+The unit test iterates through all combinations of `FusedMoEPrepareAndFinalize` and `FusedMoEPremuteExpertsUnpermute` types and if they are
+compatible, runs some correctness tests.
+If you are adding some `FusedMoEPrepareAndFinalize` / `FusedMoEPermuteExpertsUnpermute` implementations,
+
+1. Add the implementation type to `MK_ALL_PREPARE_FINALIZE_TYPES` and `MK_FUSED_EXPERT_TYPES` in [mk_objects.py](gh-file:tests/kernels/moe/modular_kernel_tools/mk_objects.py) respectively.
+2. Update `Config::is_batched_prepare_finalize()`, `Config::is_batched_fused_experts()`, `Config::is_standard_fused_experts()`,
+`Config::is_fe_16bit_supported()`,  `Config::is_fe_fp8_supported()`, `Config::is_fe_block_fp8_supported()`,
+`Config::is_fe_supports_chunking()` methods in [/tests/kernels/moe/modular_kernel_tools/common.py](gh-file:tests/kernels/moe/modular_kernel_tools/common.py)
+
+Doing this will add the new implementation to the test suite.
+
+### How To Check `FusedMoEPrepareAndFinalize` & `FusedMoEPermuteExpertsUnpermute` Compatibility
+The unit test file [test_modular_kernel_combinations.py](gh-file:tests/kernels/moe/test_modular_kernel_combinations.py) can also be executed as a standalone script.
+Example: `python3 -m tests.kernels.moe.test_modular_kernel_combinations --pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts`
+As a side-effect, this script can be used to test `FusedMoEPrepareAndFinalize` & `FusedMoEPermuteExpertsUnpermute` compatibility. When invoked
+with incompatible types, the script will error.
+
+### How To Profile
+Please take a look at [profile_modular_kernel.py](gh-file:tests/kernels/moe/modular_kernel_tools/profile_modular_kernel.py)
+The script can be used to generate Torch traces for a single `FusedMoEModularKernel::forward()` call for any compatible
+`FusedMoEPrepareAndFinalize` and `FusedMoEPermuteExpertsUnpermute` types.
+Example: `python3 -m tests.kernels.moe.modular_kernel_tools.profile_modular_kernel --pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts`
+
+## FusedMoEPrepareAndFinalize Implementations
+The following table lists the `FusedMoEPrepareAndFinalize` implementations at the time of writing,
+
+| Implementation | Type | Comments |
+| :--- | :--- | :--- |
+| DeepEPHTPrepareAndFinalize | Contiguous / Non-Batched | Uses the DeepEP High-Throughput all2all kernels. |
+| DeepEPLLPrepareAndFinalize | Batched | Uses the DeepEP Low-Latency all2all kernels. |
+| PplxPrepareAndFinalize | Batched | Uses the Perplexity all2all kernels. |
+| FlashInferCutlassMoEPrepareAndFinalize | Contiguous | |
+| MoEPrepareAndFinalizeNoEP | Contiguous | This implementation is used when there is no EP. i.e. no all2all kernels are invoked. |
+| BatchedPrepareAndFinalize | Batched | A reference prepare/finalize class that reorganizes the tokens into expert batched format, i.e. E x max_num_tokens x K. (Doesn’t use any all2all kernels. This is primarily used in unit testing) |
+
+## FusedMoEPermuteExpertsUnpermute
+The following table lists the `FusedMoEPermuteExpertsUnpermute` implementations at the time of writing,
+
+| Implementation | Type | Comment |
+| :--- | :--- | :--- |
+| BatchedDeepGemmExperts | Batched | Uses the DeepGemm’s Masked Grouped Gemm kernels for the fused_moe operation. |
+| BatchedTritonExperts | Batched | Uses a Triton Kernel for the Batched matmuls. |
+| BatchedTritonOrDeepGemmExperts | Batched | Chooses either the `BatchedDeepGemmExperts` or `BatchedTritonExperts` based on environment settings. |
+| DeepGemmExperts | Contiguous / Non-Batched | Uses DeepGemm’s Grouped Gemm kernels for fused_moe operation. |
+| TritonExperts | Contiguous / Non-Batched | Uses a Triton Kernel for fused_moe matmuls. |
+| TritonOrDeepGemmExperts | Contiguous / Non-Batched | Chooses either the `DeepGemmExperts` or `TritonExperts` based on fused_moe inputs. |
+| CutlassExpertsFP8 | Supports both Batched and Contiguous formats | Uses Cutlass Grouped Gemm implementations for the fp8 matmuls. |
+| CutlassExpertsFP4 | Supports both Batched and Contiguous formats | Uses Cutlass Grouped Gemm implementations for the fp4 matmuls. |
+| FlashInferExperts | Contiguous | Uses fused_moe operation from FlashInfer |
+| NaiveBatchedExperts | Batched | Reference Batched Experts implementation. Primarily used in unit tests. |

From 7b49cb1c6b1bf154c134a962888f2a6e12f9fc18 Mon Sep 17 00:00:00 2001
From: David Xia <david@davidxia.com>
Date: Tue, 29 Jul 2025 13:32:46 -0400
Subject: [PATCH 022/224] [Doc] update Contributing page's testing section
 (#18272)

Signed-off-by: David Xia <david@davidxia.com>
---
 docs/contributing/README.md | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/docs/contributing/README.md b/docs/contributing/README.md
index e3ae5055b9988..5a2a70d57e85f 100644
--- a/docs/contributing/README.md
+++ b/docs/contributing/README.md
@@ -26,6 +26,8 @@ See <gh-file:LICENSE>.
 
 ## Developing
 
+--8<-- "docs/getting_started/installation/python_env_setup.inc.md"
+
 Depending on the kind of development you'd like to do (e.g. Python, CUDA), you can choose to build vLLM with or without compilation.
 Check out the [building from source][build-from-source] documentation for details.
 
@@ -42,7 +44,7 @@ For an optimized workflow when iterating on C++/CUDA kernels, see the [Increment
 Install MkDocs along with the [plugins](https://github.com/vllm-project/vllm/blob/main/mkdocs.yaml) used in the vLLM documentation, as well as required dependencies:
 
 ```bash
-pip install -r requirements/docs.txt
+uv pip install -r requirements/docs.txt
 ```
 
 !!! note
@@ -98,13 +100,14 @@ For additional features and advanced configurations, refer to the official [MkDo
 ??? console "Commands"
 
     ```bash
-    pip install -r requirements/common.txt -r requirements/dev.txt
+    # These commands are only for Nvidia CUDA platforms.
+    uv pip install -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
 
     # Linting, formatting and static type checking
-    pre-commit install --hook-type pre-commit --hook-type commit-msg
+    pre-commit install
 
     # You can manually run pre-commit with
-    pre-commit run --all-files
+    pre-commit run --all-files --show-diff-on-failure
 
     # To manually run something from CI that does not run
     # locally by default, you can run:
@@ -122,6 +125,10 @@ For additional features and advanced configurations, refer to the official [MkDo
 
     Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
 
+!!! note "Install python3-dev if Python.h is missing"
+    If any of the above commands fails with `Python.h: No such file or directory`, install
+    `python3-dev` with `sudo apt install python3-dev`.
+
 !!! note
     Currently, the repository is not fully checked by `mypy`.
 
@@ -153,7 +160,7 @@ Using `-s` with `git commit` will automatically add this header.
 
 !!! tip
     You can enable automatic sign-off via your IDE:
-  
+
     - **PyCharm**: Click on the `Show Commit Options` icon to the right of the `Commit and Push...` button in the `Commit` window.
       It will bring up a `git` window where you can modify the `Author` and enable `Sign-off commit`.
     - **VSCode**: Open the [Settings editor](https://code.visualstudio.com/docs/configure/settings)

From a33ea28b1be64f0b57e9eb90389dd36715c60ecb Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 29 Jul 2025 15:51:58 -0400
Subject: [PATCH 023/224] Add `flashinfer_python` to CUDA wheel requirements
 (#21389)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile     | 4 +++-
 requirements/cuda.txt | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index b87401c593572..0cd2cfad66fdd 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -386,6 +386,8 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
 
 # Install FlashInfer from source
 ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
+# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
+# We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel.
 ARG FLASHINFER_GIT_REF="v0.2.9rc2"
 RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
   . /etc/environment
@@ -408,7 +410,7 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
         TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
             python3 -m flashinfer.aot
         TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
-            uv pip install --system --no-build-isolation .
+            uv pip install --system --no-build-isolation --force-reinstall --no-deps .
     popd
     rm -rf flashinfer
 BASH
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
index c1273b224eabf..5557c868acafa 100644
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@@ -12,3 +12,5 @@ torchaudio==2.7.1
 torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
 xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
+# FlashInfer should be updated together with the Dockerfile
+flashinfer_python==0.2.9rc2
\ No newline at end of file

From a1873db23dd597930a7e4731a53314ace92baf49 Mon Sep 17 00:00:00 2001
From: Doug Smith <dosmith@redhat.com>
Date: Tue, 29 Jul 2025 17:45:19 -0400
Subject: [PATCH 024/224] docker: docker-aware precompiled wheel support
 (#21127)

Signed-off-by: dougbtv <dosmith@redhat.com>
---
 docker/Dockerfile | 26 +++++++++++++--------
 setup.py          | 58 +++++++++++++++++++++++++++++++++++------------
 vllm/envs.py      | 11 +++++++--
 3 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 0cd2cfad66fdd..75b5ab0230c87 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -209,16 +209,7 @@ ARG SCCACHE_REGION_NAME=us-west-2
 ARG SCCACHE_S3_NO_CREDENTIALS=0
 
 # Flag to control whether to use pre-built vLLM wheels
-ARG VLLM_USE_PRECOMPILED
-# TODO: in setup.py VLLM_USE_PRECOMPILED is sensitive to truthiness, it will take =0 as "true", this should be fixed
-ENV VLLM_USE_PRECOMPILED=""
-RUN if [ "${VLLM_USE_PRECOMPILED}" = "1" ]; then \
-        export VLLM_USE_PRECOMPILED=1 && \
-        echo "Using precompiled wheels"; \
-    else \
-        unset VLLM_USE_PRECOMPILED && \
-        echo "Leaving VLLM_USE_PRECOMPILED unset to build wheels from source"; \
-    fi
+ARG VLLM_USE_PRECOMPILED=""
 
 # if USE_SCCACHE is set, use sccache to speed up compilation
 RUN --mount=type=cache,target=/root/.cache/uv \
@@ -235,6 +226,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
         && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
         && export SCCACHE_IDLE_TIMEOUT=0 \
         && export CMAKE_BUILD_TYPE=Release \
+        && export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \
+        && export VLLM_DOCKER_BUILD_CONTEXT=1 \
         && sccache --show-stats \
         && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
         && sccache --show-stats; \
@@ -248,9 +241,22 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
         # Clean any existing CMake artifacts
         rm -rf .deps && \
         mkdir -p .deps && \
+        export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
+        export VLLM_DOCKER_BUILD_CONTEXT=1 && \
         python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
     fi
 
+# When using precompiled wheels, keep only the newest manylinux1 wheel and delete others
+RUN if [ "$VLLM_USE_PRECOMPILED" = "1" ]; then \
+        echo "Cleaning up extra wheels in dist/..." && \
+        # Identify the most recent manylinux1_x86_64 wheel
+        KEEP_WHEEL=$(ls -t dist/*manylinux1_x86_64.whl 2>/dev/null | head -n1) && \
+        if [ -n "$KEEP_WHEEL" ]; then \
+            echo "Keeping wheel: $KEEP_WHEEL"; \
+            find dist/ -type f -name "*.whl" ! -path "${KEEP_WHEEL}" -delete; \
+        fi; \
+    fi
+
 # Check the size of the wheel if RUN_WHEEL_CHECK is true
 COPY .buildkite/check-wheel-size.py check-wheel-size.py
 # sync the default value with .buildkite/check-wheel-size.py
diff --git a/setup.py b/setup.py
index d46e678e7aa40..58e5833f16ae1 100644
--- a/setup.py
+++ b/setup.py
@@ -7,6 +7,7 @@ import json
 import logging
 import os
 import re
+import shutil
 import subprocess
 import sys
 from pathlib import Path
@@ -297,6 +298,10 @@ class repackage_wheel(build_ext):
             ]).decode("utf-8")
             upstream_main_commit = json.loads(resp_json)["sha"]
 
+            # In Docker build context, .git may be immutable or missing.
+            if envs.VLLM_DOCKER_BUILD_CONTEXT:
+                return upstream_main_commit
+
             # Check if the upstream_main_commit exists in the local repo
             try:
                 subprocess.check_output(
@@ -357,19 +362,48 @@ class repackage_wheel(build_ext):
             # create a temporary directory to store the wheel
             temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
             wheel_path = os.path.join(temp_dir, wheel_filename)
-
             print(f"Downloading wheel from {wheel_location} to {wheel_path}")
-
             from urllib.request import urlretrieve
-
             try:
                 urlretrieve(wheel_location, filename=wheel_path)
             except Exception as e:
                 from setuptools.errors import SetupError
-
                 raise SetupError(
                     f"Failed to get vLLM wheel from {wheel_location}") from e
 
+        # During a docker build: determine correct filename, copy wheel.
+        if envs.VLLM_DOCKER_BUILD_CONTEXT:
+            dist_dir = "/workspace/dist"
+            os.makedirs(dist_dir, exist_ok=True)
+            # Determine correct wheel filename from METADATA
+            with zipfile.ZipFile(wheel_path, "r") as z:
+                metadata_file = next(
+                    (n for n in z.namelist()
+                     if n.endswith(".dist-info/METADATA")),
+                    None,
+                )
+                if not metadata_file:
+                    raise RuntimeError(
+                        "Could not find METADATA in precompiled wheel.")
+                metadata = z.read(metadata_file).decode()
+                version_line = next((line for line in metadata.splitlines()
+                                     if line.startswith("Version: ")), None)
+                if not version_line:
+                    raise RuntimeError(
+                        "Could not determine version from METADATA.")
+                version = version_line.split(": ")[1].strip()
+
+            # Build correct filename using internal version
+            arch_tag = "cp38-abi3-manylinux1_x86_64"
+            corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
+            final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
+
+            print(f"Docker build context detected, copying precompiled wheel "
+                  f"({version}) to {final_wheel_path}")
+            shutil.copy2(wheel_path, final_wheel_path)
+            return
+
+        # Unzip the wheel when not in Docker context
         with zipfile.ZipFile(wheel_path) as wheel:
             files_to_copy = [
                 "vllm/_C.abi3.so",
@@ -378,15 +412,9 @@ class repackage_wheel(build_ext):
                 "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
                 "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
                 "vllm/cumem_allocator.abi3.so",
-                # "vllm/_version.py", # not available in nightly wheels yet
             ]
-
             file_members = list(
                 filter(lambda x: x.filename in files_to_copy, wheel.filelist))
-
-            # vllm_flash_attn python code:
-            # Regex from
-            #  `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
             compiled_regex = re.compile(
                 r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
             file_members += list(
@@ -403,11 +431,8 @@ class repackage_wheel(build_ext):
                     package_data[package_name] = []
 
                 wheel.extract(file)
-                if file_name.endswith(".py"):
-                    # python files shouldn't be added to package_data
-                    continue
-
-                package_data[package_name].append(file_name)
+                if not file_name.endswith(".py"):
+                    package_data[package_name].append(file_name)
 
 
 def _no_device() -> bool:
@@ -415,6 +440,9 @@ def _no_device() -> bool:
 
 
 def _is_cuda() -> bool:
+    # Allow forced CUDA in Docker/precompiled builds, even without torch.cuda
+    if envs.VLLM_USE_PRECOMPILED and envs.VLLM_DOCKER_BUILD_CONTEXT:
+        return True
     has_cuda = torch.version.cuda is not None
     return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
             and not (_is_neuron() or _is_tpu()))
diff --git a/vllm/envs.py b/vllm/envs.py
index fcfad4eec1621..9b6d8c8be242a 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -68,6 +68,7 @@ if TYPE_CHECKING:
     MAX_JOBS: Optional[str] = None
     NVCC_THREADS: Optional[str] = None
     VLLM_USE_PRECOMPILED: bool = False
+    VLLM_DOCKER_BUILD_CONTEXT: bool = False
     VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False
     VLLM_NO_DEPRECATION_WARNING: bool = False
     VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
@@ -222,8 +223,14 @@ environment_variables: dict[str, Callable[[], Any]] = {
 
     # If set, vllm will use precompiled binaries (*.so)
     "VLLM_USE_PRECOMPILED":
-    lambda: bool(os.environ.get("VLLM_USE_PRECOMPILED")) or bool(
-        os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
+    lambda: os.environ.get("VLLM_USE_PRECOMPILED", "").strip().lower() in
+    ("1", "true") or bool(os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
+
+    # Used to mark that setup.py is running in a Docker build context,
+    # in order to force the use of precompiled binaries.
+    "VLLM_DOCKER_BUILD_CONTEXT":
+    lambda: os.environ.get("VLLM_DOCKER_BUILD_CONTEXT", "").strip().lower() in
+    ("1", "true"),
 
     # Whether to force using nightly wheel in python build.
     # This is used for testing the nightly wheel in python build.

From 176bbce1db0ba81230c396cb46cf4035a16d2c66 Mon Sep 17 00:00:00 2001
From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com>
Date: Tue, 29 Jul 2025 17:56:29 -0400
Subject: [PATCH 025/224] Revert "[AMD][CI/Build] Fix the AMD issue caused by
 inappropriate of symbol exposure (#21647)" (#21850)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 664fb6a0ee9f0..ea56b8451f228 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -243,6 +243,7 @@ set(VLLM_EXT_SRC
   "csrc/sampler.cu"
   "csrc/cuda_view.cu"
   "csrc/quantization/gptq/q_gemm.cu"
+  "csrc/quantization/compressed_tensors/int8_quant_kernels.cu"
   "csrc/quantization/fp8/common.cu"
   "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
   "csrc/quantization/gguf/gguf_kernel.cu"
@@ -296,8 +297,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     "csrc/sparse/cutlass/sparse_scaled_mm_entry.cu"
     "csrc/cutlass_extensions/common.cpp"
     "csrc/attention/mla/cutlass_mla_entry.cu"
-    "csrc/quantization/fp8/per_token_group_quant.cu"
-    "csrc/quantization/compressed_tensors/int8_quant_kernels.cu")
+    "csrc/quantization/fp8/per_token_group_quant.cu")
 
   set_gencode_flags_for_srcs(
     SRCS "${VLLM_EXT_SRC}"

From 9266d980480c8da52a0c29960e9086128e19d664 Mon Sep 17 00:00:00 2001
From: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com>
Date: Tue, 29 Jul 2025 16:34:19 -0700
Subject: [PATCH 026/224] [BugFix] Fix interleaved sliding window not set for
 Gemma3n (#21863)

Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
---
 vllm/config.py                        | 9 +++++++--
 vllm/model_executor/models/gemma3n.py | 9 +++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 86c3b9eae64cb..1dfc746e2002d 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -723,11 +723,16 @@ class ModelConfig:
         )
 
         # Workaround for Gemma 2 which uses interleaved sliding window
-        # attention, but it's not specified in its config. TODO: remove this
-        # when Gemma 2 is fixed in Transformers.
+        # attention, but it's not specified in its config.
+        # TODO: remove this when Gemma 2 config updated in HuggingFace.
         if self.hf_text_config.model_type == "gemma2":
             self.hf_text_config.sliding_window_pattern = 2
 
+        # TODO: remove this when Gemma 3n config updated in HuggingFace.
+        if self.hf_text_config.model_type == "gemma3n_text":
+            # 4 sliding window attention followed by 1 full attention
+            self.hf_text_config.sliding_window_pattern = "LLLLG"
+
         sliding_window = getattr(self.hf_text_config, "sliding_window", None)
         sliding_window_pattern = getattr(self.hf_text_config,
                                          "sliding_window_pattern", None)
diff --git a/vllm/model_executor/models/gemma3n.py b/vllm/model_executor/models/gemma3n.py
index 7d163320e0d6a..168665cc29655 100644
--- a/vllm/model_executor/models/gemma3n.py
+++ b/vllm/model_executor/models/gemma3n.py
@@ -297,8 +297,13 @@ class Gemma3nAttention(nn.Module):
                               has_weight=False)
 
         layer_idx = extract_layer_index(prefix)
-        if config.layer_types[layer_idx] == "sliding_attention":
-            self.sliding_window = config.sliding_window
+
+        is_sliding_window = (
+            getattr(config, "interleaved_sliding_window", None) is not None
+            and config.layer_types[layer_idx] == "sliding_attention")
+
+        if is_sliding_window:
+            self.sliding_window = config.interleaved_sliding_window
             rope_theta = config.rope_local_base_freq
             rope_scaling = {"rope_type": "default"}
         else:

From 0d0cc9e15001b18997207fc86af6810500d587d9 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Tue, 29 Jul 2025 17:11:50 -0700
Subject: [PATCH 027/224] [ci] add b200 test placeholder (#21866)

Signed-off-by: simon-mo <simon.mo@hey.com>
---
 .buildkite/test-pipeline.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 6cda800b6477d..f95f038840dd2 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -643,6 +643,17 @@ steps:
     - python3 examples/offline_inference/audio_language.py --model-type whisper
     - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
 
+- label: Blackwell Test
+  working_dir: "/vllm-workspace/"
+  gpu: b200
+  # optional: true
+  source_file_dependencies:
+  - csrc/
+  - vllm/
+  commands:
+    - nvidia-smi
+    - python3 examples/offline_inference/basic/chat.py
+
 #####  1 GPU test  #####
 #####  multi gpus test  #####
 

From 452b2a3180f5003a0253de1ed369c278a6abdbe2 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Tue, 29 Jul 2025 18:03:27 -0700
Subject: [PATCH 028/224] [ci] mark blackwell test optional for now (#21878)

---
 .buildkite/test-pipeline.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index f95f038840dd2..2bf0b6fd9a169 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -646,7 +646,7 @@ steps:
 - label: Blackwell Test
   working_dir: "/vllm-workspace/"
   gpu: b200
-  # optional: true
+  optional: true
   source_file_dependencies:
   - csrc/
   - vllm/

From 0e36abf9931baa070609376debb4fb3772f4a3fe Mon Sep 17 00:00:00 2001
From: milesial <milesial@users.noreply.github.com>
Date: Tue, 29 Jul 2025 18:16:25 -0700
Subject: [PATCH 029/224] [Bugfix] Correct max tokens for non-contiguous embeds
 (#21798)

Signed-off-by: Alexandre Milesi <30204471+milesial@users.noreply.github.com>
Co-authored-by: Alexandre Milesi <30204471+milesial@users.noreply.github.com>
---
 vllm/multimodal/profiling.py | 31 ++++++++++++++++++++++++++++---
 vllm/multimodal/registry.py  |  2 +-
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py
index 7f6fb47a21fa6..d96803b643ff2 100644
--- a/vllm/multimodal/profiling.py
+++ b/vllm/multimodal/profiling.py
@@ -180,11 +180,14 @@ class MultiModalProfiler(Generic[_I]):
     def _get_mm_num_tokens(
         self,
         mm_inputs: MultiModalInputs,
+        mm_embeddings_only: bool = True,
     ) -> Mapping[str, int]:
         placeholders_by_modality = mm_inputs["mm_placeholders"]
 
         return {
-            modality: sum(item.get_num_embeds() for item in placeholders)
+            modality:
+            sum(item.get_num_embeds() if mm_embeddings_only else item.length
+                for item in placeholders)
             for modality, placeholders in placeholders_by_modality.items()
         }
 
@@ -253,10 +256,11 @@ class MultiModalProfiler(Generic[_I]):
             multi_modal_placeholders=mm_inputs["mm_placeholders"],
         )
 
-    def get_mm_max_tokens(
+    def _get_mm_max_tokens(
         self,
         seq_len: int,
         mm_counts: Optional[Mapping[str, int]] = None,
+        mm_embeddings_only: bool = True,
     ) -> Mapping[str, int]:
         if mm_counts is None:
             mm_counts = self.get_mm_limits()
@@ -285,4 +289,25 @@ class MultiModalProfiler(Generic[_I]):
             return max_tokens_per_item
 
         mm_inputs = self._get_dummy_mm_inputs(seq_len, mm_counts)
-        return self._get_mm_num_tokens(mm_inputs)
+        return self._get_mm_num_tokens(mm_inputs,
+                                       mm_embeddings_only=mm_embeddings_only)
+
+    def get_mm_max_contiguous_tokens(
+        self,
+        seq_len: int,
+        mm_counts: Optional[Mapping[str, int]] = None,
+    ):
+        """
+        Returns the maximum length of the multimodal (image placeholders+text)
+        tokens, including any break/text tokens in-between image embeddings.
+
+        <im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>
+        Returns 9, even when the number of image embeddings is 6.
+        
+        This is important to take into account when profiling and
+        initializing the encoder cache size.
+        """
+
+        return self._get_mm_max_tokens(seq_len,
+                                       mm_counts,
+                                       mm_embeddings_only=False)
diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
index c44fcacd246c4..bfa391829d290 100644
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -129,7 +129,7 @@ class MultiModalRegistry:
         seq_len = model_config.max_model_len
         mm_limits = self.get_mm_limits_per_prompt(model_config)
 
-        return profiler.get_mm_max_tokens(
+        return profiler.get_mm_max_contiguous_tokens(
             seq_len,
             {
                 modality: 1

From 555e7225bcb9cdf9b037ce064e48987dbc3e13a0 Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Tue, 29 Jul 2025 18:45:29 -0700
Subject: [PATCH 030/224] [v1][attention] Support Hybrid Allocator + FlashInfer
 (#21412)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
---
 tests/v1/attention/test_attention_backends.py | 19 ++++++-----
 tests/v1/spec_decode/test_eagle.py            |  1 +
 tests/v1/worker/test_gpu_model_runner.py      |  3 +-
 vllm/config.py                                | 32 ++++++++++++++-----
 vllm/v1/attention/backends/cpu_attn.py        |  4 +--
 vllm/v1/attention/backends/flash_attn.py      |  4 +--
 vllm/v1/attention/backends/flashinfer.py      | 18 ++++-------
 vllm/v1/attention/backends/flex_attention.py  |  4 +--
 vllm/v1/attention/backends/mamba_attn.py      |  4 +--
 vllm/v1/attention/backends/mla/common.py      |  4 ++-
 vllm/v1/attention/backends/mla/flashmla.py    |  7 ++--
 .../attention/backends/mla/rocm_aiter_mla.py  |  7 ++--
 vllm/v1/attention/backends/rocm_aiter_fa.py   |  4 +--
 vllm/v1/attention/backends/triton_attn.py     |  4 +--
 vllm/v1/attention/backends/utils.py           | 14 +++++---
 vllm/v1/worker/gpu_model_runner.py            | 13 +++++---
 16 files changed, 85 insertions(+), 57 deletions(-)

diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py
index 9bd0b99798d77..f197cbb7bbba0 100644
--- a/tests/v1/attention/test_attention_backends.py
+++ b/tests/v1/attention/test_attention_backends.py
@@ -198,7 +198,8 @@ class MockAttentionLayer:
 
 
 def run_attention_backend(backend: _Backend, kv_cache_spec: FullAttentionSpec,
-                          vllm_config, device: torch.device,
+                          layer_names: list[str], vllm_config,
+                          device: torch.device,
                           common_attn_metadata: CommonAttentionMetadata,
                           query: torch.Tensor, key: torch.Tensor,
                           value: torch.Tensor,
@@ -211,31 +212,33 @@ def run_attention_backend(backend: _Backend, kv_cache_spec: FullAttentionSpec,
     if backend == _Backend.FLASHINFER_VLLM_V1:
         import unittest.mock
 
-        from vllm.v1.attention.backends.flashinfer import PerLayerParameters
+        from vllm.v1.attention.backends.utils import PerLayerParameters
 
-        def mock_get_per_layer_parameters(vllm_config, impl_cls):
+        def mock_get_per_layer_parameters(vllm_config, layer_names, impl_cls):
             # Return mock parameters for a single layer
             head_size = vllm_config.model_config.get_head_size()
             return {
-                "mock_layer":
+                layer_name:
                 PerLayerParameters(
                     window_left=-1,  # No sliding window
                     logits_soft_cap=0.0,  # No soft cap
                     sm_scale=1.0 / (head_size**0.5)  # Standard scale
                 )
+                for layer_name in layer_names
             }
 
         with unittest.mock.patch(
                 'vllm.v1.attention.backends.flashinfer.get_per_layer_parameters',
                 mock_get_per_layer_parameters):
-            builder = builder_cls(kv_cache_spec, vllm_config, device)
+            builder = builder_cls(kv_cache_spec, layer_names, vllm_config,
+                                  device)
             attn_metadata = builder.build(
                 common_prefix_len=0,
                 common_attn_metadata=common_attn_metadata,
             )
     else:
         # Build metadata
-        builder = builder_cls(kv_cache_spec, vllm_config, device)
+        builder = builder_cls(kv_cache_spec, layer_names, vllm_config, device)
         attn_metadata = builder.build(
             common_prefix_len=0,
             common_attn_metadata=common_attn_metadata,
@@ -427,8 +430,8 @@ def test_backend_correctness(batch_spec_name: str, model: str):
             set_kv_cache_layout("HND")
 
         backend_output = run_attention_backend(backend_name, kv_cache_spec,
-                                               vllm_config, device,
-                                               common_attn_metadata,
+                                               ["placeholder"], vllm_config,
+                                               device, common_attn_metadata,
                                                query_vllm, key_vllm,
                                                value_vllm,
                                                kv_cache_for_backend)
diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py
index da7e5e2c467dc..a126c7c943ed0 100644
--- a/tests/v1/spec_decode/test_eagle.py
+++ b/tests/v1/spec_decode/test_eagle.py
@@ -305,6 +305,7 @@ def test_propose(num_speculative_tokens):
         _Backend.FLASH_ATTN_VLLM_V1)
     attn_metadata_builder = attn_metadata_builder_cls(
         kv_cache_spec=create_standard_kv_cache_spec(proposer.vllm_config),
+        layer_names=proposer.attn_layer_names,
         vllm_config=proposer.vllm_config,
         device=device,
     )
diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py
index e14fbe1e47ecf..231dfcbb68848 100644
--- a/tests/v1/worker/test_gpu_model_runner.py
+++ b/tests/v1/worker/test_gpu_model_runner.py
@@ -745,7 +745,8 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch):
     layer_4 = "model.layers.4.mixer"
     layer_5 = "model.layers.5.mixer"
 
-    with set_current_vllm_config(vllm_config):
+    with set_current_vllm_config(vllm_config), monkeypatch.context() as m:
+        m.setenv("VLLM_ATTENTION_BACKEND", "FLASHINFER")
         hf_config = vllm_config.model_config.hf_config
         fwd_context = {}
         for key in [layer_0, layer_1]:
diff --git a/vllm/config.py b/vllm/config.py
index 1dfc746e2002d..8e8c1198833c2 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -740,8 +740,8 @@ class ModelConfig:
             isinstance(sliding_window, list))
 
         if not self.disable_sliding_window and has_interleaved_attention:
-            if (backend :=
-                    envs.VLLM_ATTENTION_BACKEND) in ("XFORMERS", "FLASHINFER"):
+            if not envs.VLLM_USE_V1 and (backend := envs.VLLM_ATTENTION_BACKEND
+                                         ) in ("XFORMERS", "FLASHINFER"):
                 sliding_window_len_min = get_min_sliding_window(
                     self.hf_text_config.sliding_window)
 
@@ -5065,13 +5065,29 @@ def assert_hashable(text):
 T = TypeVar("T")
 
 
-def get_layers_from_vllm_config(vllm_config: VllmConfig,
-                                layer_type: type[T]) -> dict[str, T]:
+def get_layers_from_vllm_config(
+        vllm_config: VllmConfig,
+        layer_type: type[T],
+        layer_names: Optional[list[str]] = None) -> dict[str, T]:
+    """
+    Get layers from the vLLM config.
+
+    Args:
+        vllm_config: The vLLM config.
+        layer_type: The type of the layer to get.
+        layer_names: The names of the layers to get. If None, return all layers.
+    """
+
+    if layer_names is None:
+        layer_names = list(
+            vllm_config.compilation_config.static_forward_context.keys())
+
+    forward_context = vllm_config.compilation_config.static_forward_context
+
     return {
-        layer_name: layer
-        for layer_name, layer in
-        vllm_config.compilation_config.static_forward_context.items()
-        if isinstance(layer, layer_type)
+        layer_name: forward_context[layer_name]
+        for layer_name in layer_names
+        if isinstance(forward_context[layer_name], layer_type)
     }
 
 
diff --git a/vllm/v1/attention/backends/cpu_attn.py b/vllm/v1/attention/backends/cpu_attn.py
index 3b6d753863d07..9ed46331863c9 100644
--- a/vllm/v1/attention/backends/cpu_attn.py
+++ b/vllm/v1/attention/backends/cpu_attn.py
@@ -315,8 +315,8 @@ class TorchSDPAMetadata(AttentionMetadata):
 
 class TorchSDPAMetadataBuilderV1(AttentionMetadataBuilder[TorchSDPAMetadata]):
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device) -> None:
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device) -> None:
         self.kv_cache_spec = kv_cache_spec
         self.vllm_config = vllm_config
         self.scheduler_config = vllm_config.scheduler_config
diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index 7c8a5e056fea5..4c2a6c6b985b2 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -148,8 +148,8 @@ class FlashAttentionMetadataBuilder(
         AttentionMetadataBuilder[FlashAttentionMetadata]):
     full_cudagraph_supported: ClassVar[bool] = get_flash_attn_version() == 3
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
         self.vllm_config = vllm_config
         self.model_config = vllm_config.model_config
         self.parallel_config = vllm_config.parallel_config
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 775780807eae2..27552f0e7c1ef 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -21,10 +21,9 @@ from vllm.platforms import current_platform
 from vllm.utils import cdiv
 from vllm.v1.attention.backends.flash_attn import use_cascade_attention
 from vllm.v1.attention.backends.utils import (
-    AttentionMetadataBuilder, CommonAttentionMetadata, PerLayerParameters,
-    get_kv_cache_layout, get_per_layer_parameters,
-    infer_global_hyperparameters, reorder_batch_to_split_decodes_and_prefills,
-    split_decodes_and_prefills)
+    AttentionMetadataBuilder, CommonAttentionMetadata, get_kv_cache_layout,
+    get_per_layer_parameters, infer_global_hyperparameters,
+    reorder_batch_to_split_decodes_and_prefills, split_decodes_and_prefills)
 from vllm.v1.kv_cache_interface import AttentionSpec
 
 if TYPE_CHECKING:
@@ -219,8 +218,8 @@ class FlashInferMetadata:
 
 class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
         self.device = device
         self._workspace_buffer = None
         self._prefill_wrapper = None  # Wrapper for prefill/append
@@ -228,7 +227,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
         self._cascade_wrapper = None  # Wrapper for cascade attention
 
         # Global hyperparameters shared by all attention layers
-        self.global_hyperparameters: Optional[PerLayerParameters] = None
+        self.global_hyperparameters = infer_global_hyperparameters(
+            get_per_layer_parameters(vllm_config, layer_names, FlashInferImpl))
 
         self.vllm_config = vllm_config
         self.cache_config = vllm_config.cache_config
@@ -283,10 +283,6 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
 
     def _plan(self, num_prefills: int, num_decodes: int,
               attn_metadata: FlashInferMetadata):
-        if self.global_hyperparameters is None:
-            self.global_hyperparameters = infer_global_hyperparameters(
-                get_per_layer_parameters(self.vllm_config, FlashInferImpl))
-
         if attn_metadata.use_cascade:
             attn_metadata.cascade_wrapper = self._get_cascade_wrapper()
             attn_metadata.cascade_wrapper.plan(
diff --git a/vllm/v1/attention/backends/flex_attention.py b/vllm/v1/attention/backends/flex_attention.py
index ad63f92cd88a7..bb0d890c7754d 100644
--- a/vllm/v1/attention/backends/flex_attention.py
+++ b/vllm/v1/attention/backends/flex_attention.py
@@ -258,8 +258,8 @@ class FlexAttentionMetadata:
 class FlexAttentionMetadataBuilder(
         AttentionMetadataBuilder[FlexAttentionMetadata]):
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
         self.model_config = vllm_config.model_config
         self.parallel_config = vllm_config.parallel_config
         self.cache_config = vllm_config.cache_config
diff --git a/vllm/v1/attention/backends/mamba_attn.py b/vllm/v1/attention/backends/mamba_attn.py
index dca5de46c0653..8b702e28d67c0 100644
--- a/vllm/v1/attention/backends/mamba_attn.py
+++ b/vllm/v1/attention/backends/mamba_attn.py
@@ -87,8 +87,8 @@ class Mamba2AttentionMetadata:
 class Mamba2AttentionMetadataBuilder(
         AttentionMetadataBuilder[Mamba2AttentionMetadata]):
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
         assert isinstance(kv_cache_spec, MambaSpec)
         self.kv_cache_spec = kv_cache_spec
         self.chunk_size = vllm_config.model_config.get_mamba_chunk_size()
diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py
index cf17d93302395..0095d75217856 100755
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@@ -406,6 +406,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
 
     def __init__(self,
                  kv_cache_spec: AttentionSpec,
+                 layer_names: list[str],
                  vllm_config: VllmConfig,
                  device: torch.device,
                  metadata_cls: Optional[type[M]] = None):
@@ -471,7 +472,8 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
                 BatchPrefillWithRaggedKVCacheWrapper] = []
 
             self._global_hyperparameters = infer_global_hyperparameters(
-                get_per_layer_parameters(vllm_config, MLACommonImpl))
+                get_per_layer_parameters(vllm_config, layer_names,
+                                         MLACommonImpl))
 
         if self._use_cudnn_prefill:
             self.cudnn_workspace = torch.empty(
diff --git a/vllm/v1/attention/backends/mla/flashmla.py b/vllm/v1/attention/backends/mla/flashmla.py
index d3e5300dbbd6b..39463b9c06164 100644
--- a/vllm/v1/attention/backends/mla/flashmla.py
+++ b/vllm/v1/attention/backends/mla/flashmla.py
@@ -56,9 +56,10 @@ class FlashMLAMetadata(MLACommonMetadata[FlashMLADecodeMetadata]):
 class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]):
     full_cudagraph_supported: ClassVar[bool] = True  # Decode-only
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
-        super().__init__(kv_cache_spec, vllm_config, device, FlashMLAMetadata)
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
+        super().__init__(kv_cache_spec, layer_names, vllm_config, device,
+                         FlashMLAMetadata)
 
         self.compilation_config = vllm_config.compilation_config
         self.num_q_heads = vllm_config.model_config.get_num_attention_heads(
diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
index 834c234558350..5c5891f035ae2 100644
--- a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
+++ b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
@@ -66,9 +66,10 @@ class AiterMLAMetadata(MLACommonMetadata[AiterMLADecodeMetadata]):
 class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
     full_cudagraph_supported: ClassVar[bool] = True  # decode only
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
-        super().__init__(kv_cache_spec, vllm_config, device, AiterMLAMetadata)
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
+        super().__init__(kv_cache_spec, layer_names, vllm_config, device,
+                         AiterMLAMetadata)
         assert self.kv_cache_spec.block_size == 1, "AITER MLA" \
             "only supports block size 1."
 
diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
index 85a5dc8c91c13..dd10b7f02730a 100644
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -231,8 +231,8 @@ class AiterFlashAttentionMetadataBuilder(
         AttentionMetadataBuilder[AiterFlashAttentionMetadata]):
     full_cudagraph_supported: ClassVar[bool] = True
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
         self.vllm_config = vllm_config
         self.model_config = vllm_config.model_config
         self.parallel_config = vllm_config.parallel_config
diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py
index 83471ca51b73f..195fbd3b1b9c4 100644
--- a/vllm/v1/attention/backends/triton_attn.py
+++ b/vllm/v1/attention/backends/triton_attn.py
@@ -59,8 +59,8 @@ class TritonAttentionMetadataBuilder(
         AttentionMetadataBuilder[TritonAttentionMetadata]):
     full_cudagraph_supported: ClassVar[bool] = True
 
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
         self.device = device
         self.block_size = kv_cache_spec.block_size
         self.kv_cache_spec = kv_cache_spec
diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index b13362f8a8d8d..d1599ba10b618 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -70,8 +70,8 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]):
     full_cudagraph_supported: ClassVar[bool] = False
 
     @abstractmethod
-    def __init__(self, kv_cache_spec: AttentionSpec, vllm_config: VllmConfig,
-                 device: torch.device):
+    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
+                 vllm_config: VllmConfig, device: torch.device):
         self.kv_cache_spec = kv_cache_spec
 
     @abstractmethod
@@ -164,14 +164,14 @@ class PerLayerParameters:
 
 
 def get_per_layer_parameters(
-        vllm_config: VllmConfig,
+        vllm_config: VllmConfig, layer_names: list[str],
         cls_: type['AttentionImpl']) -> dict[str, PerLayerParameters]:
     """
-    Scan all attention layers and determine some hyperparameters
+    Scan layers in `layer_names` and determine some hyperparameters
     to use during `plan`.
     """
 
-    layers = get_layers_from_vllm_config(vllm_config, Attention)
+    layers = get_layers_from_vllm_config(vllm_config, Attention, layer_names)
     per_layer_params: dict[str, PerLayerParameters] = {}
 
     for key, layer in layers.items():
@@ -208,6 +208,10 @@ def infer_global_hyperparameters(
     param_sets = list(per_layer_params.values())
     global_params = param_sets[0]
     for params in param_sets:
+        if params.window_left != global_params.window_left:
+            raise ValueError(
+                "Window left is not the same for all layers. One potential fix "
+                "is to set disable_sliding_window=True")
         assert params == global_params, (
             "FlashInfer backend currently only supports models in which all "
             "layers share the same values for the following hyperparameters: "
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 84ad582c9c9de..3befb6adf2753 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2521,7 +2521,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                     elapsed_time, cuda_graph_size / (1 << 30))
 
     def _initialize_single_attn_backend(
-        self, kv_cache_spec: KVCacheSpec
+        self, kv_cache_spec: KVCacheSpec, layer_names: list[str]
     ) -> tuple[AttentionBackend, AttentionMetadataBuilder]:
         if isinstance(kv_cache_spec, AttentionSpec):
             attn_backend_i = get_attn_backend(
@@ -2551,6 +2551,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
 
         attn_metadata_builder_i = attn_backend_i.get_builder_cls()(
             kv_cache_spec,
+            layer_names,
             self.vllm_config,
             self.device,
         )
@@ -2574,8 +2575,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 kv_cache_config.kv_cache_groups):
             kv_cache_spec = kv_cache_group_spec.kv_cache_spec
 
-            attn_backend_i, attn_metadata_builder_i = \
-                self._initialize_single_attn_backend(kv_cache_spec)
+            attn_backend_i, attn_metadata_builder_i = (
+                self._initialize_single_attn_backend(
+                    kv_cache_spec, kv_cache_group_spec.layer_names))
             self.attn_backends.append(attn_backend_i)
             self.attn_metadata_builders.append(attn_metadata_builder_i)
 
@@ -2606,8 +2608,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             assert len(attn_specs) == len(attn_layers), \
                 "All or none of the layers are expected to be encoder-only"
 
-            attn_backend, attn_metadata_builder = \
-                self._initialize_single_attn_backend(attn_specs[0])
+            attn_backend, attn_metadata_builder = (
+                self._initialize_single_attn_backend(attn_specs[0],
+                                                     attn_layers.keys()))
             self.attn_backends.append(attn_backend)
             self.attn_metadata_builders.append(attn_metadata_builder)
             self.is_encoder_only_model = True

From ba5c5e5404d2d3fdee02e163fc75a44bd960935f Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Wed, 30 Jul 2025 03:45:08 +0100
Subject: [PATCH 031/224] [Docs] Switch to better markdown linting pre-commit
 hook (#21851)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .buildkite/nightly-benchmarks/README.md       |  5 +
 .../nightly-benchmarks/nightly-annotation.md  | 19 ++--
 .../nightly-descriptions.md                   | 34 +++----
 .../performance-benchmarks-descriptions.md    |  1 +
 .github/PULL_REQUEST_TEMPLATE.md              |  4 +-
 .markdownlint.yaml                            | 13 +++
 .pre-commit-config.yaml                       |  7 +-
 README.md                                     |  7 ++
 RELEASE.md                                    |  5 +-
 benchmarks/README.md                          | 99 +++++++++++--------
 benchmarks/auto_tune/README.md                |  8 +-
 benchmarks/kernels/deepgemm/README.md         |  4 +-
 csrc/quantization/cutlass_w8a8/Epilogues.md   |  5 +-
 docs/cli/README.md                            |  4 +-
 docs/configuration/tpu.md                     | 15 ++-
 docs/contributing/ci/failures.md              |  8 +-
 .../contributing/ci/update_pytorch_version.md |  4 +-
 docs/contributing/deprecation_policy.md       |  6 +-
 docs/contributing/profiling.md                |  4 +-
 docs/contributing/vulnerability_management.md |  6 +-
 docs/deployment/frameworks/anything-llm.md    | 12 +--
 docs/deployment/frameworks/chatbox.md         | 10 +-
 docs/deployment/frameworks/dify.md            | 10 +-
 docs/deployment/frameworks/haystack.md        |  2 -
 .../retrieval_augmented_generation.md         |  1 +
 .../integrations/production-stack.md          |  9 +-
 docs/deployment/k8s.md                        |  2 +-
 docs/design/metrics.md                        |  4 +-
 docs/design/p2p_nccl_connector.md             |  4 +-
 docs/design/prefix_caching.md                 | 11 ++-
 docs/design/torch_compile.md                  |  6 +-
 docs/features/compatibility_matrix.md         |  6 +-
 docs/features/lora.md                         |  2 +
 docs/features/multimodal_inputs.md            |  2 +
 docs/features/quantization/auto_round.md      |  2 +-
 docs/features/quantization/int4.md            |  4 +-
 .../quantization/quantized_kvcache.md         |  1 +
 docs/features/quantization/quark.md           |  1 +
 docs/features/quantization/torchao.md         |  1 +
 docs/getting_started/installation/cpu.md      |  6 +-
 .../installation/intel_gaudi.md               |  8 +-
 docs/models/hardware_supported_models/tpu.md  |  5 +-
 docs/models/supported_models.md               | 14 +--
 docs/serving/distributed_serving.md           |  2 +-
 docs/serving/expert_parallel_deployment.md    |  3 +-
 docs/serving/openai_compatible_server.md      |  1 +
 docs/usage/security.md                        | 32 +++---
 docs/usage/v1_guide.md                        | 10 +-
 .../disaggregated-prefill-v1/README.md        |  2 +-
 .../offline_inference/openai_batch/README.md  |  8 +-
 examples/others/lmcache/README.md             |  4 +
 examples/others/logging_configuration.md      |  6 +-
 pyproject.toml                                | 10 --
 tools/ep_kernels/README.md                    |  9 +-
 vllm/plugins/lora_resolvers/README.md         |  3 +-
 55 files changed, 273 insertions(+), 198 deletions(-)
 create mode 100644 .markdownlint.yaml

diff --git a/.buildkite/nightly-benchmarks/README.md b/.buildkite/nightly-benchmarks/README.md
index ae42f70077cec..fcde284efea98 100644
--- a/.buildkite/nightly-benchmarks/README.md
+++ b/.buildkite/nightly-benchmarks/README.md
@@ -28,6 +28,7 @@ See [vLLM performance dashboard](https://perf.vllm.ai) for the latest performanc
 ## Trigger the benchmark
 
 Performance benchmark will be triggered when:
+
 - A PR being merged into vllm.
 - Every commit for those PRs with `perf-benchmarks` label AND `ready` label.
 
@@ -38,6 +39,7 @@ bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
 ```
 
 Runtime environment variables:
+
 - `ON_CPU`: set the value to '1' on Intel® Xeon® Processors. Default value is 0.
 - `SERVING_JSON`: JSON file to use for the serving tests. Default value is empty string (use default file).
 - `LATENCY_JSON`: JSON file to use for the latency tests. Default value is empty string (use default file).
@@ -46,12 +48,14 @@ Runtime environment variables:
 - `REMOTE_PORT`: Port for the remote vLLM service to benchmark. Default value is empty string.
 
 Nightly benchmark will be triggered when:
+
 - Every commit for those PRs with `perf-benchmarks` label and `nightly-benchmarks` label.
 
 ## Performance benchmark details
 
 See [performance-benchmarks-descriptions.md](performance-benchmarks-descriptions.md) for detailed descriptions, and use `tests/latency-tests.json`, `tests/throughput-tests.json`, `tests/serving-tests.json` to configure the test cases.
 > NOTE: For Intel® Xeon® Processors, use `tests/latency-tests-cpu.json`, `tests/throughput-tests-cpu.json`, `tests/serving-tests-cpu.json` instead.
+>
 ### Latency test
 
 Here is an example of one test inside `latency-tests.json`:
@@ -149,6 +153,7 @@ Here is an example using the script to compare result_a and result_b without det
 
 Here is an example using the script to compare result_a and result_b with detail test name.
 `python3 compare-json-results.py -f results_a/benchmark_results.json -f results_b/benchmark_results.json`
+
 |   | results_a/benchmark_results.json_name | results_a/benchmark_results.json | results_b/benchmark_results.json_name | results_b/benchmark_results.json | perf_ratio        |
 |---|---------------------------------------------|----------------------------------------|---------------------------------------------|----------------------------------------|----------|
 | 0 | serving_llama8B_tp1_sharegpt_qps_1          | 142.633982                             | serving_llama8B_tp1_sharegpt_qps_1          | 156.526018                             | 1.097396 |
diff --git a/.buildkite/nightly-benchmarks/nightly-annotation.md b/.buildkite/nightly-benchmarks/nightly-annotation.md
index ef11c040057c8..466def07b6f1f 100644
--- a/.buildkite/nightly-benchmarks/nightly-annotation.md
+++ b/.buildkite/nightly-benchmarks/nightly-annotation.md
@@ -1,3 +1,4 @@
+# Nightly benchmark annotation
 
 ## Description
 
@@ -13,15 +14,15 @@ Please download the visualization scripts in the post
 
 - Find the docker we use in `benchmarking pipeline`
 - Deploy the docker, and inside the docker:
-  - Download `nightly-benchmarks.zip`.
-  - In the same folder, run the following code:
+    - Download `nightly-benchmarks.zip`.
+    - In the same folder, run the following code:
 
-  ```bash
-  export HF_TOKEN=<your HF token>
-  apt update
-  apt install -y git
-  unzip nightly-benchmarks.zip
-  VLLM_SOURCE_CODE_LOC=./ bash .buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
-  ```
+    ```bash
+    export HF_TOKEN=<your HF token>
+    apt update
+    apt install -y git
+    unzip nightly-benchmarks.zip
+    VLLM_SOURCE_CODE_LOC=./ bash .buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
+    ```
 
 And the results will be inside `./benchmarks/results`.
diff --git a/.buildkite/nightly-benchmarks/nightly-descriptions.md b/.buildkite/nightly-benchmarks/nightly-descriptions.md
index 5f003f42f07c0..8afde017d383e 100644
--- a/.buildkite/nightly-benchmarks/nightly-descriptions.md
+++ b/.buildkite/nightly-benchmarks/nightly-descriptions.md
@@ -13,25 +13,25 @@ Latest reproduction guilde: [github issue link](https://github.com/vllm-project/
 ## Setup
 
 - Docker images:
-  - vLLM: `vllm/vllm-openai:v0.6.2`
-  - SGLang: `lmsysorg/sglang:v0.3.2-cu121`
-  - LMDeploy: `openmmlab/lmdeploy:v0.6.1-cu12`
-  - TensorRT-LLM: `nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3`
-    - *NOTE: we uses r24.07 as the current implementation only works for this version. We are going to bump this up.*
-  - Check [nightly-pipeline.yaml](nightly-pipeline.yaml) for the concrete docker images, specs and commands we use for the benchmark.
+    - vLLM: `vllm/vllm-openai:v0.6.2`
+    - SGLang: `lmsysorg/sglang:v0.3.2-cu121`
+    - LMDeploy: `openmmlab/lmdeploy:v0.6.1-cu12`
+    - TensorRT-LLM: `nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3`
+        - *NOTE: we uses r24.07 as the current implementation only works for this version. We are going to bump this up.*
+    - Check [nightly-pipeline.yaml](nightly-pipeline.yaml) for the concrete docker images, specs and commands we use for the benchmark.
 - Hardware
-  - 8x Nvidia A100 GPUs
+    - 8x Nvidia A100 GPUs
 - Workload:
-  - Dataset
-    - ShareGPT dataset
-    - Prefill-heavy dataset (in average 462 input tokens, 16 tokens as output)
-    - Decode-heavy dataset (in average 462 input tokens, 256 output tokens)
-    - Check [nightly-tests.json](tests/nightly-tests.json) for the concrete configuration of datasets we use.
-  - Models: llama-3 8B, llama-3 70B.
-    - We do not use llama 3.1 as it is incompatible with trt-llm r24.07. ([issue](https://github.com/NVIDIA/TensorRT-LLM/issues/2105)).
-  - Average QPS (query per second): 2, 4, 8, 16, 32 and inf.
-    - Queries are randomly sampled, and arrival patterns are determined via Poisson process, but all with fixed random seed.
-  - Evaluation metrics: Throughput (higher the better), TTFT (time to the first token, lower the better), ITL (inter-token latency, lower the better).
+    - Dataset
+        - ShareGPT dataset
+        - Prefill-heavy dataset (in average 462 input tokens, 16 tokens as output)
+        - Decode-heavy dataset (in average 462 input tokens, 256 output tokens)
+        - Check [nightly-tests.json](tests/nightly-tests.json) for the concrete configuration of datasets we use.
+    - Models: llama-3 8B, llama-3 70B.
+        - We do not use llama 3.1 as it is incompatible with trt-llm r24.07. ([issue](https://github.com/NVIDIA/TensorRT-LLM/issues/2105)).
+    - Average QPS (query per second): 2, 4, 8, 16, 32 and inf.
+        - Queries are randomly sampled, and arrival patterns are determined via Poisson process, but all with fixed random seed.
+    - Evaluation metrics: Throughput (higher the better), TTFT (time to the first token, lower the better), ITL (inter-token latency, lower the better).
 
 ## Known issues
 
diff --git a/.buildkite/nightly-benchmarks/performance-benchmarks-descriptions.md b/.buildkite/nightly-benchmarks/performance-benchmarks-descriptions.md
index a1f8441ccdac8..8bb16bd3cf373 100644
--- a/.buildkite/nightly-benchmarks/performance-benchmarks-descriptions.md
+++ b/.buildkite/nightly-benchmarks/performance-benchmarks-descriptions.md
@@ -1,3 +1,4 @@
+# Performance benchmarks descriptions
 
 ## Latency tests
 
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 017ec7ca82da7..d4aceab4472fa 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,4 +1,5 @@
-## Essential Elements of an Effective PR Description Checklist
+# Essential Elements of an Effective PR Description Checklist
+
 - [ ] The purpose of the PR, such as "Fix some issue (link existing issues this PR will resolve)".
 - [ ] The test plan, such as providing test command.
 - [ ] The test results, such as pasting the results comparison before and after, or e2e results
@@ -14,5 +15,4 @@ PLEASE FILL IN THE PR DESCRIPTION HERE ENSURING ALL CHECKLIST ITEMS ABOVE HAVE B
 
 ## (Optional) Documentation Update
 
-<!--- pyml disable-next-line no-emphasis-as-heading -->
 **BEFORE SUBMITTING, PLEASE READ <https://docs.vllm.ai/en/latest/contributing>** (anything written below this line will be removed by GitHub Actions)
diff --git a/.markdownlint.yaml b/.markdownlint.yaml
new file mode 100644
index 0000000000000..c86fed9555d62
--- /dev/null
+++ b/.markdownlint.yaml
@@ -0,0 +1,13 @@
+MD007:
+  indent: 4
+MD013: false
+MD024:
+  siblings_only: true
+MD033: false
+MD042: false
+MD045: false
+MD046: false
+MD051: false
+MD052: false
+MD053: false
+MD059: false
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5197820fb4020..045096cb86369 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -35,12 +35,11 @@ repos:
     exclude: 'csrc/(moe/topk_softmax_kernels.cu|quantization/gguf/(ggml-common.h|dequantize.cuh|vecdotq.cuh|mmq.cuh|mmvq.cuh))|vllm/third_party/.*'
     types_or: [c++, cuda]
     args: [--style=file, --verbose]
-- repo: https://github.com/jackdewinter/pymarkdown
-  rev: v0.9.29
+- repo: https://github.com/igorshubovych/markdownlint-cli
+  rev: v0.45.0
   hooks:
-  - id: pymarkdown
+  - id: markdownlint-fix
     exclude: '.*\.inc\.md'
-    args: [fix]
 - repo: https://github.com/rhysd/actionlint
   rev: v1.7.7
   hooks:
diff --git a/README.md b/README.md
index dc2f0afbe3538..5348405b72d2c 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+<!-- markdownlint-disable MD001 MD041 -->
 <p align="center">
   <picture>
     <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/vllm-project/vllm/main/docs/assets/logos/vllm-logo-text-dark.png">
@@ -16,6 +17,7 @@ Easy, fast, and cheap LLM serving for everyone
 ---
 
 *Latest News* 🔥
+
 - [2025/05] We hosted [NYC vLLM Meetup](https://lu.ma/c1rqyf1f)! Please find the meetup slides [here](https://docs.google.com/presentation/d/1_q_aW_ioMJWUImf1s1YM-ZhjXz8cUeL0IJvaquOYBeA/edit?usp=sharing).
 - [2025/05] vLLM is now a hosted project under PyTorch Foundation! Please find the announcement [here](https://pytorch.org/blog/pytorch-foundation-welcomes-vllm/).
 - [2025/04] We hosted [Asia Developer Day](https://www.sginnovate.com/event/limited-availability-morning-evening-slots-remaining-inaugural-vllm-asia-developer-day)! Please find the meetup slides from the vLLM team [here](https://docs.google.com/presentation/d/19cp6Qu8u48ihB91A064XfaXruNYiBOUKrBxAmDOllOo/edit?usp=sharing).
@@ -46,6 +48,7 @@ Easy, fast, and cheap LLM serving for everyone
 </details>
 
 ---
+
 ## About
 
 vLLM is a fast and easy-to-use library for LLM inference and serving.
@@ -75,6 +78,7 @@ vLLM is flexible and easy to use with:
 - Multi-LoRA support
 
 vLLM seamlessly supports most popular open-source models on HuggingFace, including:
+
 - Transformer-like LLMs (e.g., Llama)
 - Mixture-of-Expert LLMs (e.g., Mixtral, Deepseek-V2 and V3)
 - Embedding Models (e.g., E5-Mistral)
@@ -91,6 +95,7 @@ pip install vllm
 ```
 
 Visit our [documentation](https://docs.vllm.ai/en/latest/) to learn more.
+
 - [Installation](https://docs.vllm.ai/en/latest/getting_started/installation.html)
 - [Quickstart](https://docs.vllm.ai/en/latest/getting_started/quickstart.html)
 - [List of Supported Models](https://docs.vllm.ai/en/latest/models/supported_models.html)
@@ -107,6 +112,7 @@ vLLM is a community project. Our compute resources for development and testing a
 <!-- Note: Please sort them in alphabetical order. -->
 <!-- Note: Please keep these consistent with docs/community/sponsors.md -->
 Cash Donations:
+
 - a16z
 - Dropbox
 - Sequoia Capital
@@ -114,6 +120,7 @@ Cash Donations:
 - ZhenFund
 
 Compute Resources:
+
 - AMD
 - Anyscale
 - AWS
diff --git a/RELEASE.md b/RELEASE.md
index 9352e7ef706c6..db0d51afc7be1 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -60,9 +60,10 @@ Please note: **No feature work allowed for cherry picks**. All PRs that are cons
 Before each release, we perform end-to-end performance validation to ensure no regressions are introduced. This validation uses the [vllm-benchmark workflow](https://github.com/pytorch/pytorch-integration-testing/actions/workflows/vllm-benchmark.yml) on PyTorch CI.
 
 **Current Coverage:**
+
 * Models: Llama3, Llama4, and Mixtral
 * Hardware: NVIDIA H100 and AMD MI300x
-* *Note: Coverage may change based on new model releases and hardware availability*
+* _Note: Coverage may change based on new model releases and hardware availability_
 
 **Performance Validation Process:**
 
@@ -71,11 +72,13 @@ Request write access to the [pytorch/pytorch-integration-testing](https://github
 
 **Step 2: Review Benchmark Setup**
 Familiarize yourself with the benchmark configurations:
+
 * [CUDA setup](https://github.com/pytorch/pytorch-integration-testing/tree/main/vllm-benchmarks/benchmarks/cuda)
 * [ROCm setup](https://github.com/pytorch/pytorch-integration-testing/tree/main/vllm-benchmarks/benchmarks/rocm)
 
 **Step 3: Run the Benchmark**
 Navigate to the [vllm-benchmark workflow](https://github.com/pytorch/pytorch-integration-testing/actions/workflows/vllm-benchmark.yml) and configure:
+
 * **vLLM branch**: Set to the release branch (e.g., `releases/v0.9.2`)
 * **vLLM commit**: Set to the RC commit hash
 
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 3b10963c3e014..644517235b122 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -4,7 +4,7 @@ This README guides you through running benchmark tests with the extensive
 datasets supported on vLLM. It’s a living document, updated as new features and datasets
 become available.
 
-**Dataset Overview**
+## Dataset Overview
 
 <table style="width:100%; border-collapse: collapse;">
   <thead>
@@ -81,9 +81,10 @@ become available.
 
 **Note**: HuggingFace dataset's `dataset-name` should be set to `hf`
 
----
+## 🚀 Example - Online Benchmark
+
 <details>
-<summary><b>🚀 Example - Online Benchmark</b></summary>
+<summary>Show more</summary>
 
 <br/>
 
@@ -109,7 +110,7 @@ vllm bench serve \
 
 If successful, you will see the following output
 
-```
+```text
 ============ Serving Benchmark Result ============
 Successful requests:                     10
 Benchmark duration (s):                  5.78
@@ -133,11 +134,11 @@ P99 ITL (ms):                            8.39
 ==================================================
 ```
 
-**Custom Dataset**
+### Custom Dataset
 
 If the dataset you want to benchmark is not supported yet in vLLM, even then you can benchmark on it using `CustomDataset`. Your data needs to be in `.jsonl` format and needs to have "prompt" field per entry, e.g., data.jsonl
 
-```
+```json
 {"prompt": "What is the capital of India?"}
 {"prompt": "What is the capital of Iran?"}
 {"prompt": "What is the capital of China?"}
@@ -166,7 +167,7 @@ vllm bench serve --port 9001 --save-result --save-detailed \
 
 You can skip applying chat template if your data already has it by using `--custom-skip-chat-template`.
 
-**VisionArena Benchmark for Vision Language Models**
+### VisionArena Benchmark for Vision Language Models
 
 ```bash
 # need a model with vision capability here
@@ -184,7 +185,7 @@ vllm bench serve \
   --num-prompts 1000
 ```
 
-**InstructCoder Benchmark with Speculative Decoding**
+### InstructCoder Benchmark with Speculative Decoding
 
 ``` bash
 VLLM_USE_V1=1 vllm serve meta-llama/Meta-Llama-3-8B-Instruct \
@@ -201,13 +202,13 @@ vllm bench serve \
     --num-prompts 2048
 ```
 
-**Other HuggingFaceDataset Examples**
+### Other HuggingFaceDataset Examples
 
 ```bash
 vllm serve Qwen/Qwen2-VL-7B-Instruct --disable-log-requests
 ```
 
-**`lmms-lab/LLaVA-OneVision-Data`**
+`lmms-lab/LLaVA-OneVision-Data`:
 
 ```bash
 vllm bench serve \
@@ -221,7 +222,7 @@ vllm bench serve \
   --num-prompts 10
 ```
 
-**`Aeala/ShareGPT_Vicuna_unfiltered`**
+`Aeala/ShareGPT_Vicuna_unfiltered`:
 
 ```bash
 vllm bench serve \
@@ -234,7 +235,7 @@ vllm bench serve \
   --num-prompts 10
 ```
 
-**`AI-MO/aimo-validation-aime`**
+`AI-MO/aimo-validation-aime`:
 
 ``` bash
 vllm bench serve \
@@ -245,7 +246,7 @@ vllm bench serve \
     --seed 42
 ```
 
-**`philschmid/mt-bench`**
+`philschmid/mt-bench`:
 
 ``` bash
 vllm bench serve \
@@ -255,7 +256,7 @@ vllm bench serve \
     --num-prompts 80
 ```
 
-**Running With Sampling Parameters**
+### Running With Sampling Parameters
 
 When using OpenAI-compatible backends such as `vllm`, optional sampling
 parameters can be specified. Example client command:
@@ -273,25 +274,29 @@ vllm bench serve \
   --num-prompts 10
 ```
 
-**Running With Ramp-Up Request Rate**
+### Running With Ramp-Up Request Rate
 
 The benchmark tool also supports ramping up the request rate over the
 duration of the benchmark run. This can be useful for stress testing the
 server or finding the maximum throughput that it can handle, given some latency budget.
 
 Two ramp-up strategies are supported:
+
 - `linear`: Increases the request rate linearly from a start value to an end value.
 - `exponential`: Increases the request rate exponentially.
 
 The following arguments can be used to control the ramp-up:
+
 - `--ramp-up-strategy`: The ramp-up strategy to use (`linear` or `exponential`).
 - `--ramp-up-start-rps`: The request rate at the beginning of the benchmark.
 - `--ramp-up-end-rps`: The request rate at the end of the benchmark.
 
 </details>
 
+## 📈 Example - Offline Throughput Benchmark
+
 <details>
-<summary><b>📈 Example - Offline Throughput Benchmark</b></summary>
+<summary>Show more</summary>
 
 <br/>
 
@@ -305,15 +310,15 @@ vllm bench throughput \
 
 If successful, you will see the following output
 
-```
+```text
 Throughput: 7.15 requests/s, 4656.00 total tokens/s, 1072.15 output tokens/s
 Total num prompt tokens:  5014
 Total num output tokens:  1500
 ```
 
-**VisionArena Benchmark for Vision Language Models**
+### VisionArena Benchmark for Vision Language Models
 
-``` bash
+```bash
 vllm bench throughput \
   --model Qwen/Qwen2-VL-7B-Instruct \
   --backend vllm-chat \
@@ -325,13 +330,13 @@ vllm bench throughput \
 
 The `num prompt tokens` now includes image token counts
 
-```
+```text
 Throughput: 2.55 requests/s, 4036.92 total tokens/s, 326.90 output tokens/s
 Total num prompt tokens:  14527
 Total num output tokens:  1280
 ```
 
-**InstructCoder Benchmark with Speculative Decoding**
+### InstructCoder Benchmark with Speculative Decoding
 
 ``` bash
 VLLM_WORKER_MULTIPROC_METHOD=spawn \
@@ -349,15 +354,15 @@ vllm bench throughput \
     "prompt_lookup_min": 2}'
 ```
 
-```
+```text
 Throughput: 104.77 requests/s, 23836.22 total tokens/s, 10477.10 output tokens/s
 Total num prompt tokens:  261136
 Total num output tokens:  204800
 ```
 
-**Other HuggingFaceDataset Examples**
+### Other HuggingFaceDataset Examples
 
-**`lmms-lab/LLaVA-OneVision-Data`**
+`lmms-lab/LLaVA-OneVision-Data`:
 
 ```bash
 vllm bench throughput \
@@ -370,7 +375,7 @@ vllm bench throughput \
   --num-prompts 10
 ```
 
-**`Aeala/ShareGPT_Vicuna_unfiltered`**
+`Aeala/ShareGPT_Vicuna_unfiltered`:
 
 ```bash
 vllm bench throughput \
@@ -382,7 +387,7 @@ vllm bench throughput \
   --num-prompts 10
 ```
 
-**`AI-MO/aimo-validation-aime`**
+`AI-MO/aimo-validation-aime`:
 
 ```bash
 vllm bench throughput \
@@ -394,7 +399,7 @@ vllm bench throughput \
   --num-prompts 10
 ```
 
-**Benchmark with LoRA Adapters**
+Benchmark with LoRA adapters:
 
 ``` bash
 # download dataset
@@ -413,20 +418,22 @@ vllm bench throughput \
 
 </details>
 
+## 🛠️ Example - Structured Output Benchmark
+
 <details>
-<summary><b>🛠️ Example - Structured Output Benchmark</b></summary>
+<summary>Show more</summary>
 
 <br/>
 
 Benchmark the performance of structured output generation (JSON, grammar, regex).
 
-**Server Setup**
+### Server Setup
 
 ```bash
 vllm serve NousResearch/Hermes-3-Llama-3.1-8B --disable-log-requests
 ```
 
-**JSON Schema Benchmark**
+### JSON Schema Benchmark
 
 ```bash
 python3 benchmarks/benchmark_serving_structured_output.py \
@@ -438,7 +445,7 @@ python3 benchmarks/benchmark_serving_structured_output.py \
   --num-prompts 1000
 ```
 
-**Grammar-based Generation Benchmark**
+### Grammar-based Generation Benchmark
 
 ```bash
 python3 benchmarks/benchmark_serving_structured_output.py \
@@ -450,7 +457,7 @@ python3 benchmarks/benchmark_serving_structured_output.py \
   --num-prompts 1000
 ```
 
-**Regex-based Generation Benchmark**
+### Regex-based Generation Benchmark
 
 ```bash
 python3 benchmarks/benchmark_serving_structured_output.py \
@@ -461,7 +468,7 @@ python3 benchmarks/benchmark_serving_structured_output.py \
   --num-prompts 1000
 ```
 
-**Choice-based Generation Benchmark**
+### Choice-based Generation Benchmark
 
 ```bash
 python3 benchmarks/benchmark_serving_structured_output.py \
@@ -472,7 +479,7 @@ python3 benchmarks/benchmark_serving_structured_output.py \
   --num-prompts 1000
 ```
 
-**XGrammar Benchmark Dataset**
+### XGrammar Benchmark Dataset
 
 ```bash
 python3 benchmarks/benchmark_serving_structured_output.py \
@@ -485,14 +492,16 @@ python3 benchmarks/benchmark_serving_structured_output.py \
 
 </details>
 
+## 📚 Example - Long Document QA Benchmark
+
 <details>
-<summary><b>📚 Example - Long Document QA Benchmark</b></summary>
+<summary>Show more</summary>
 
 <br/>
 
 Benchmark the performance of long document question-answering with prefix caching.
 
-**Basic Long Document QA Test**
+### Basic Long Document QA Test
 
 ```bash
 python3 benchmarks/benchmark_long_document_qa_throughput.py \
@@ -504,7 +513,7 @@ python3 benchmarks/benchmark_long_document_qa_throughput.py \
   --repeat-count 5
 ```
 
-**Different Repeat Modes**
+### Different Repeat Modes
 
 ```bash
 # Random mode (default) - shuffle prompts randomly
@@ -537,14 +546,16 @@ python3 benchmarks/benchmark_long_document_qa_throughput.py \
 
 </details>
 
+## 🗂️ Example - Prefix Caching Benchmark
+
 <details>
-<summary><b>🗂️ Example - Prefix Caching Benchmark</b></summary>
+<summary>Show more</summary>
 
 <br/>
 
 Benchmark the efficiency of automatic prefix caching.
 
-**Fixed Prompt with Prefix Caching**
+### Fixed Prompt with Prefix Caching
 
 ```bash
 python3 benchmarks/benchmark_prefix_caching.py \
@@ -555,7 +566,7 @@ python3 benchmarks/benchmark_prefix_caching.py \
   --input-length-range 128:256
 ```
 
-**ShareGPT Dataset with Prefix Caching**
+### ShareGPT Dataset with Prefix Caching
 
 ```bash
 # download dataset
@@ -572,14 +583,16 @@ python3 benchmarks/benchmark_prefix_caching.py \
 
 </details>
 
+## ⚡ Example - Request Prioritization Benchmark
+
 <details>
-<summary><b>⚡ Example - Request Prioritization Benchmark</b></summary>
+<summary>Show more</summary>
 
 <br/>
 
 Benchmark the performance of request prioritization in vLLM.
 
-**Basic Prioritization Test**
+### Basic Prioritization Test
 
 ```bash
 python3 benchmarks/benchmark_prioritization.py \
@@ -590,7 +603,7 @@ python3 benchmarks/benchmark_prioritization.py \
   --scheduling-policy priority
 ```
 
-**Multiple Sequences per Prompt**
+### Multiple Sequences per Prompt
 
 ```bash
 python3 benchmarks/benchmark_prioritization.py \
diff --git a/benchmarks/auto_tune/README.md b/benchmarks/auto_tune/README.md
index c479ff1aa29c0..9aad51df6e003 100644
--- a/benchmarks/auto_tune/README.md
+++ b/benchmarks/auto_tune/README.md
@@ -3,6 +3,7 @@
 This script automates the process of finding the optimal server parameter combination (`max-num-seqs` and `max-num-batched-tokens`) to maximize throughput for a vLLM server. It also supports additional constraints such as E2E latency and prefix cache hit rate.
 
 ## Table of Contents
+
 - [Prerequisites](#prerequisites)
 - [Configuration](#configuration)
 - [How to Run](#how-to-run)
@@ -52,7 +53,7 @@ You must set the following variables at the top of the script before execution.
 1. **Configure**: Edit the script and set the variables in the [Configuration](#configuration) section.
 2. **Execute**: Run the script. Since the process can take a long time, it is highly recommended to use a terminal multiplexer like `tmux` or `screen` to prevent the script from stopping if your connection is lost.
 
-```
+```bash
 cd <FOLDER_OF_THIS_SCRIPT>
 bash auto_tune.sh
 ```
@@ -64,6 +65,7 @@ bash auto_tune.sh
 Here are a few examples of how to configure the script for different goals:
 
 ### 1. Maximize Throughput (No Latency Constraint)
+
 - **Goal**: Find the best `max-num-seqs` and `max-num-batched-tokens` to get the highest possible throughput for 1800 input tokens and 20 output tokens.
 - **Configuration**:
 
@@ -76,6 +78,7 @@ MAX_LATENCY_ALLOWED_MS=100000000000 # A very large number
 ```
 
 #### 2. Maximize Throughput with a Latency Requirement
+
 - **Goal**: Find the best server parameters when P99 end-to-end latency must be below 500ms.
 - **Configuration**:
 
@@ -88,6 +91,7 @@ MAX_LATENCY_ALLOWED_MS=500
 ```
 
 #### 3. Maximize Throughput with Prefix Caching and Latency Requirements
+
 - **Goal**: Find the best server parameters assuming a 60% prefix cache hit rate and a latency requirement of 500ms.
 - **Configuration**:
 
@@ -109,7 +113,7 @@ After the script finishes, you will find the results in a new, timestamped direc
 
 - **Final Result Summary**: A file named `result.txt` is created in the log directory. It contains a summary of each tested combination and concludes with the overall best parameters found.
 
-```
+```text
 # Example result.txt content
 hash:a1b2c3d4...
 max_num_seqs: 128, max_num_batched_tokens: 2048, request_rate: 10.0, e2el: 450.5, throughput: 9.8, goodput: 9.8
diff --git a/benchmarks/kernels/deepgemm/README.md b/benchmarks/kernels/deepgemm/README.md
index 917e814010f89..41e68e047be82 100644
--- a/benchmarks/kernels/deepgemm/README.md
+++ b/benchmarks/kernels/deepgemm/README.md
@@ -8,7 +8,7 @@ Currently this just includes dense GEMMs and only works on Hopper GPUs.
 
 You need to install vLLM in your usual fashion, then install DeepGEMM from source in its own directory:
 
-```
+```bash
 git clone --recursive https://github.com/deepseek-ai/DeepGEMM
 cd DeepGEMM
 python setup.py install
@@ -17,7 +17,7 @@ uv pip install -e .
 
 ## Usage
 
-```
+```console
 python benchmark_fp8_block_dense_gemm.py
 INFO 02-26 21:55:13 [__init__.py:207] Automatically detected platform cuda.
 ===== STARTING FP8 GEMM BENCHMARK =====
diff --git a/csrc/quantization/cutlass_w8a8/Epilogues.md b/csrc/quantization/cutlass_w8a8/Epilogues.md
index a30e1fdf3ac77..15a66913e97a3 100644
--- a/csrc/quantization/cutlass_w8a8/Epilogues.md
+++ b/csrc/quantization/cutlass_w8a8/Epilogues.md
@@ -86,6 +86,7 @@ D = s_a s_b \widehat A \widehat B
 ```
 
 Epilogue parameters:
+
 - `scale_a` is the scale for activations, can be per-tensor (scalar) or per-token (column-vector).
 - `scale_b` is the scale for weights, can be per-tensor (scalar) or per-channel (row-vector).
 
@@ -135,7 +136,7 @@ That is precomputed and stored in `azp_with_adj` as a row-vector.
 Epilogue parameters:
 
 - `scale_a` is the scale for activations, can be per-tensor (scalar) or per-token (column-vector).
-  - Generally this will be per-tensor as the zero-points are per-tensor.
+    - Generally this will be per-tensor as the zero-points are per-tensor.
 - `scale_b` is the scale for weights, can be per-tensor (scalar) or per-channel (row-vector).
 - `azp_with_adj` is the precomputed zero-point term ($` z_a J_a \widehat B `$), is per-channel (row-vector).
 - `bias` is the bias, is always per-channel (row-vector).
@@ -152,7 +153,7 @@ That means the zero-point term $` z_a J_a \widehat B `$ becomes an outer product
 Epilogue parameters:
 
 - `scale_a` is the scale for activations, can be per-tensor (scalar) or per-token (column-vector).
-  - Generally this will be per-token as the zero-points are per-token.
+    - Generally this will be per-token as the zero-points are per-token.
 - `scale_b` is the scale for weights, can be per-tensor (scalar) or per-channel (row-vector).
 - `azp_adj` is the precomputed zero-point adjustment term ($` \mathbf 1 \widehat B `$), is per-channel (row-vector).
 - `azp` is the zero-point (`z_a`), is per-token (column-vector).
diff --git a/docs/cli/README.md b/docs/cli/README.md
index dfb6051a8c8a6..b1371c82a4c4d 100644
--- a/docs/cli/README.md
+++ b/docs/cli/README.md
@@ -6,13 +6,13 @@ toc_depth: 4
 
 The vllm command-line tool is used to run and manage vLLM models. You can start by viewing the help message with:
 
-```
+```bash
 vllm --help
 ```
 
 Available Commands:
 
-```
+```bash
 vllm {chat,complete,serve,bench,collect-env,run-batch}
 ```
 
diff --git a/docs/configuration/tpu.md b/docs/configuration/tpu.md
index 005b7f78f4407..0ff0cdda380e9 100644
--- a/docs/configuration/tpu.md
+++ b/docs/configuration/tpu.md
@@ -40,6 +40,7 @@ Although the first compilation can take some time, for all subsequent server lau
 Use `VLLM_XLA_CACHE_PATH` environment variable to write to shareable storage for future deployed nodes (like when using autoscaling).
 
 #### Reducing compilation time
+
 This initial compilation time ranges significantly and is impacted by many of the arguments discussed in this optimization doc. Factors that influence the length of time to compile are things like model size and `--max-num-batch-tokens`. Other arguments you can tune are things like `VLLM_TPU_MOST_MODEL_LEN`.
 
 ### Optimize based on your data
@@ -71,12 +72,15 @@ The fewer tokens we pad, the less unnecessary computation TPU does, the better p
 
 However, you need to be careful to choose the padding gap. If the gap is too small, it means the number of buckets is large, leading to increased warmup (precompile) time and higher memory to store the compiled graph. Too many compilaed graphs may lead to HBM OOM. Conversely, an overly large gap yields no performance improvement compared to the default exponential padding.
 
-**If possible, use the precision that matches the chip’s hardware acceleration**
+#### Quantization
+
+If possible, use the precision that matches the chip’s hardware acceleration:
 
 - v5e has int4/int8 hardware acceleration in the MXU
 - v6e has int4/int8 hardware acceleration in the MXU
 
-Supported quantized formats and features in vLLM on TPU [Jul '25]
+Supported quantized formats and features in vLLM on TPU [Jul '25]:
+
 - INT8 W8A8
 - INT8 W8A16
 - FP8 KV cache
@@ -84,11 +88,13 @@ Supported quantized formats and features in vLLM on TPU [Jul '25]
 - [WIP] AWQ
 - [WIP] FP4 W4A8
 
-**Don't set TP to be less than the number of chips on a single-host deployment**
+#### Parallelization
+
+Don't set TP to be less than the number of chips on a single-host deployment.
 
 Although it’s common to do this with GPUs, don't try to fragment 2 or 8 different workloads across 8 chips on a single host. If you need 1 or 4 chips, just create an instance with 1 or 4 chips (these are partial-host machine types).
 
-### Tune your workloads!
+### Tune your workloads
 
 Although we try to have great default configs, we strongly recommend you check out the [vLLM auto-tuner](../../benchmarks/auto_tune/README.md) to optimize your workloads for your use case.
 
@@ -99,6 +105,7 @@ Although we try to have great default configs, we strongly recommend you check o
 The auto-tuner provides a profile of optimized configurations as its final step. However, interpreting this profile can be challenging for new users. We plan to expand this section in the future with more detailed guidance. In the meantime, you can learn how to collect a TPU profile using vLLM's native profiling tools [here](../examples/offline_inference/profiling_tpu.md). This profile can provide valuable insights into your workload's performance.
 
 #### SPMD
+
 More details to come.
 
 **Want us to cover something that isn't listed here? Open up an issue please and cite this doc. We'd love to hear your questions or tips.**
diff --git a/docs/contributing/ci/failures.md b/docs/contributing/ci/failures.md
index 573efb3b05f6e..d7e2dfbca8760 100644
--- a/docs/contributing/ci/failures.md
+++ b/docs/contributing/ci/failures.md
@@ -20,19 +20,19 @@ the failure?
 
 - **Use this title format:**
 
-    ```
+    ```text
     [CI Failure]: failing-test-job - regex/matching/failing:test
     ```
 
 - **For the environment field:**
 
-    ```
- Still failing on main as of commit abcdef123
+    ```text
+    Still failing on main as of commit abcdef123
     ```
 
 - **In the description, include failing tests:**
 
-    ```
+    ```text
     FAILED failing/test.py:failing_test1 - Failure description
     FAILED failing/test.py:failing_test2 - Failure description
     https://github.com/orgs/vllm-project/projects/20
diff --git a/docs/contributing/ci/update_pytorch_version.md b/docs/contributing/ci/update_pytorch_version.md
index 699d0531ac768..3a6026d450a67 100644
--- a/docs/contributing/ci/update_pytorch_version.md
+++ b/docs/contributing/ci/update_pytorch_version.md
@@ -106,6 +106,7 @@ releases (which would take too much time), they can be built from
 source to unblock the update process.
 
 ### FlashInfer
+
 Here is how to build and install it from source with `torch2.7.0+cu128` in vLLM [Dockerfile](https://github.com/vllm-project/vllm/blob/27bebcd89792d5c4b08af7a65095759526f2f9e1/docker/Dockerfile#L259-L271):
 
 ```bash
@@ -121,6 +122,7 @@ public location for immediate installation, such as [this FlashInfer wheel link]
 team if you want to get the package published there.
 
 ### xFormers
+
 Similar to FlashInfer, here is how to build and install xFormers from source:
 
 ```bash
@@ -138,7 +140,7 @@ uv pip install --system \
 
 ### causal-conv1d
 
-```
+```bash
 uv pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
 ```
 
diff --git a/docs/contributing/deprecation_policy.md b/docs/contributing/deprecation_policy.md
index ff69cbae08b23..904ef4ca058c0 100644
--- a/docs/contributing/deprecation_policy.md
+++ b/docs/contributing/deprecation_policy.md
@@ -31,7 +31,7 @@ Features that fall under this policy include (at a minimum) the following:
 The deprecation process consists of several clearly defined stages that span
 multiple Y releases:
 
-**1. Deprecated (Still On By Default)**
+### 1. Deprecated (Still On By Default)
 
 - **Action**: Feature is marked as deprecated.
 - **Timeline**: A removal version is explicitly stated in the deprecation
@@ -46,7 +46,7 @@ warning (e.g., "This will be removed in v0.10.0").
     - GitHub Issue (RFC) for feedback
     - Documentation and use of the `@typing_extensions.deprecated` decorator for Python APIs
 
-**2.Deprecated (Off By Default)**
+### 2.Deprecated (Off By Default)
 
 - **Action**: Feature is disabled by default, but can still be re-enabled via a
 CLI flag or environment variable. Feature throws an error when used without
@@ -55,7 +55,7 @@ re-enabling.
 while signaling imminent removal. Ensures any remaining usage is clearly
 surfaced and blocks silent breakage before full removal.
 
-**3. Removed**
+### 3. Removed
 
 - **Action**: Feature is completely removed from the codebase.
 - **Note**: Only features that have passed through the previous deprecation
diff --git a/docs/contributing/profiling.md b/docs/contributing/profiling.md
index 13c3bc2c7e031..7c18b464b576c 100644
--- a/docs/contributing/profiling.md
+++ b/docs/contributing/profiling.md
@@ -112,13 +112,13 @@ vllm bench serve \
 
 In practice, you should set the `--duration` argument to a large value. Whenever you want the server to stop profiling, run:
 
-```
+```bash
 nsys sessions list
 ```
 
 to get the session id in the form of `profile-XXXXX`, then run:
 
-```
+```bash
 nsys stop --session=profile-XXXXX
 ```
 
diff --git a/docs/contributing/vulnerability_management.md b/docs/contributing/vulnerability_management.md
index e20b10f8f7b32..847883f742974 100644
--- a/docs/contributing/vulnerability_management.md
+++ b/docs/contributing/vulnerability_management.md
@@ -32,9 +32,9 @@ We prefer to keep all vulnerability-related communication on the security report
 on GitHub. However, if you need to contact the VMT directly for an urgent issue,
 you may contact the following individuals:
 
-- Simon Mo - simon.mo@hey.com
-- Russell Bryant - rbryant@redhat.com
-- Huzaifa Sidhpurwala - huzaifas@redhat.com
+- Simon Mo - <simon.mo@hey.com>
+- Russell Bryant - <rbryant@redhat.com>
+- Huzaifa Sidhpurwala - <huzaifas@redhat.com>
 
 ## Slack Discussion
 
diff --git a/docs/deployment/frameworks/anything-llm.md b/docs/deployment/frameworks/anything-llm.md
index d6b28a358cc3d..e62a33b2085ca 100644
--- a/docs/deployment/frameworks/anything-llm.md
+++ b/docs/deployment/frameworks/anything-llm.md
@@ -19,9 +19,9 @@ vllm serve Qwen/Qwen1.5-32B-Chat-AWQ --max-model-len 4096
 - Download and install [Anything LLM desktop](https://anythingllm.com/desktop).
 
 - On the bottom left of open settings, AI Prooviders --> LLM:
-  - LLM Provider: Generic OpenAI
-  - Base URL: http://{vllm server host}:{vllm server port}/v1
-  - Chat Model Name: `Qwen/Qwen1.5-32B-Chat-AWQ`
+    - LLM Provider: Generic OpenAI
+    - Base URL: http://{vllm server host}:{vllm server port}/v1
+    - Chat Model Name: `Qwen/Qwen1.5-32B-Chat-AWQ`
 
 ![](../../assets/deployment/anything-llm-provider.png)
 
@@ -30,9 +30,9 @@ vllm serve Qwen/Qwen1.5-32B-Chat-AWQ --max-model-len 4096
 ![](../../assets/deployment/anything-llm-chat-without-doc.png)
 
 - Click the upload button:
-  - upload the doc
-  - select the doc and move to the workspace
-  - save and embed
+    - upload the doc
+    - select the doc and move to the workspace
+    - save and embed
 
 ![](../../assets/deployment/anything-llm-upload-doc.png)
 
diff --git a/docs/deployment/frameworks/chatbox.md b/docs/deployment/frameworks/chatbox.md
index 15f92ed1e34df..cbca6e6282fc6 100644
--- a/docs/deployment/frameworks/chatbox.md
+++ b/docs/deployment/frameworks/chatbox.md
@@ -19,11 +19,11 @@ vllm serve qwen/Qwen1.5-0.5B-Chat
 - Download and install [Chatbox desktop](https://chatboxai.app/en#download).
 
 - On the bottom left of settings, Add Custom Provider
-  - API Mode: `OpenAI API Compatible`
-  - Name: vllm
-  - API Host: `http://{vllm server host}:{vllm server port}/v1`
-  - API Path: `/chat/completions`
-  - Model: `qwen/Qwen1.5-0.5B-Chat`
+    - API Mode: `OpenAI API Compatible`
+    - Name: vllm
+    - API Host: `http://{vllm server host}:{vllm server port}/v1`
+    - API Path: `/chat/completions`
+    - Model: `qwen/Qwen1.5-0.5B-Chat`
 
 ![](../../assets/deployment/chatbox-settings.png)
 
diff --git a/docs/deployment/frameworks/dify.md b/docs/deployment/frameworks/dify.md
index a3063194fb513..35f02c33cb02b 100644
--- a/docs/deployment/frameworks/dify.md
+++ b/docs/deployment/frameworks/dify.md
@@ -34,11 +34,11 @@ docker compose up -d
 - In the top-right user menu (under the profile icon), go to Settings, then click `Model Provider`, and locate the `vLLM` provider to install it.
 
 - Fill in the model provider details as follows:
-  - **Model Type**: `LLM`
-  - **Model Name**: `Qwen/Qwen1.5-7B-Chat`
-  - **API Endpoint URL**: `http://{vllm_server_host}:{vllm_server_port}/v1`
-  - **Model Name for API Endpoint**: `Qwen/Qwen1.5-7B-Chat`
-  - **Completion Mode**: `Completion`
+    - **Model Type**: `LLM`
+    - **Model Name**: `Qwen/Qwen1.5-7B-Chat`
+    - **API Endpoint URL**: `http://{vllm_server_host}:{vllm_server_port}/v1`
+    - **Model Name for API Endpoint**: `Qwen/Qwen1.5-7B-Chat`
+    - **Completion Mode**: `Completion`
 
 ![](../../assets/deployment/dify-settings.png)
 
diff --git a/docs/deployment/frameworks/haystack.md b/docs/deployment/frameworks/haystack.md
index a18d68142cabb..70b4b48d4543e 100644
--- a/docs/deployment/frameworks/haystack.md
+++ b/docs/deployment/frameworks/haystack.md
@@ -1,7 +1,5 @@
 # Haystack
 
-# Haystack
-
 [Haystack](https://github.com/deepset-ai/haystack) is an end-to-end LLM framework that allows you to build applications powered by LLMs, Transformer models, vector search and more. Whether you want to perform retrieval-augmented generation (RAG), document search, question answering or answer generation, Haystack can orchestrate state-of-the-art embedding models and LLMs into pipelines to build end-to-end NLP applications and solve your use case.
 
 It allows you to deploy a large language model (LLM) server with vLLM as the backend, which exposes OpenAI-compatible endpoints.
diff --git a/docs/deployment/frameworks/retrieval_augmented_generation.md b/docs/deployment/frameworks/retrieval_augmented_generation.md
index 96dd99e7118b6..d5f2ec302b6cd 100644
--- a/docs/deployment/frameworks/retrieval_augmented_generation.md
+++ b/docs/deployment/frameworks/retrieval_augmented_generation.md
@@ -3,6 +3,7 @@
 [Retrieval-augmented generation (RAG)](https://en.wikipedia.org/wiki/Retrieval-augmented_generation) is a technique that enables generative artificial intelligence (Gen AI) models to retrieve and incorporate new information. It modifies interactions with a large language model (LLM) so that the model responds to user queries with reference to a specified set of documents, using this information to supplement information from its pre-existing training data. This allows LLMs to use domain-specific and/or updated information. Use cases include providing chatbot access to internal company data or generating responses based on authoritative sources.
 
 Here are the integrations:
+
 - vLLM + [langchain](https://github.com/langchain-ai/langchain) + [milvus](https://github.com/milvus-io/milvus)
 - vLLM + [llamaindex](https://github.com/run-llama/llama_index) + [milvus](https://github.com/milvus-io/milvus)
 
diff --git a/docs/deployment/integrations/production-stack.md b/docs/deployment/integrations/production-stack.md
index 497f9f1a92a5d..fae392589c060 100644
--- a/docs/deployment/integrations/production-stack.md
+++ b/docs/deployment/integrations/production-stack.md
@@ -140,11 +140,12 @@ The core vLLM production stack configuration is managed with YAML. Here is the e
     ```
 
 In this YAML configuration:
+
 * **`modelSpec`** includes:
-  * `name`: A nickname that you prefer to call the model.
-  * `repository`: Docker repository of vLLM.
-  * `tag`: Docker image tag.
-  * `modelURL`: The LLM model that you want to use.
+    * `name`: A nickname that you prefer to call the model.
+    * `repository`: Docker repository of vLLM.
+    * `tag`: Docker image tag.
+    * `modelURL`: The LLM model that you want to use.
 * **`replicaCount`**: Number of replicas.
 * **`requestCPU` and `requestMemory`**: Specifies the CPU and memory resource requests for the pod.
 * **`requestGPU`**: Specifies the number of GPUs required.
diff --git a/docs/deployment/k8s.md b/docs/deployment/k8s.md
index f244b0858eb6e..cad801a4312cc 100644
--- a/docs/deployment/k8s.md
+++ b/docs/deployment/k8s.md
@@ -5,7 +5,7 @@ Deploying vLLM on Kubernetes is a scalable and efficient way to serve machine le
 - [Deployment with CPUs](#deployment-with-cpus)
 - [Deployment with GPUs](#deployment-with-gpus)
 - [Troubleshooting](#troubleshooting)
-  - [Startup Probe or Readiness Probe Failure, container log contains "KeyboardInterrupt: terminated"](#startup-probe-or-readiness-probe-failure-container-log-contains-keyboardinterrupt-terminated)
+    - [Startup Probe or Readiness Probe Failure, container log contains "KeyboardInterrupt: terminated"](#startup-probe-or-readiness-probe-failure-container-log-contains-keyboardinterrupt-terminated)
 - [Conclusion](#conclusion)
 
 Alternatively, you can deploy vLLM to Kubernetes using any of the following:
diff --git a/docs/design/metrics.md b/docs/design/metrics.md
index 52cd320dd4e11..ba34c7dca0017 100644
--- a/docs/design/metrics.md
+++ b/docs/design/metrics.md
@@ -361,7 +361,7 @@ instances in Prometheus.
 
 We use this concept for the `vllm:cache_config_info` metric:
 
-```
+```text
 # HELP vllm:cache_config_info Information of the LLMEngine CacheConfig
 # TYPE vllm:cache_config_info gauge
 vllm:cache_config_info{block_size="16",cache_dtype="auto",calculate_kv_scales="False",cpu_offload_gb="0",enable_prefix_caching="False",gpu_memory_utilization="0.9",...} 1.0
@@ -686,7 +686,7 @@ documentation for this option states:
 The metrics were added by <gh-pr:7089> and who up in an OpenTelemetry trace
 as:
 
-```
+```text
 -> gen_ai.latency.time_in_scheduler: Double(0.017550230026245117)
 -> gen_ai.latency.time_in_model_forward: Double(3.151565277099609)
 -> gen_ai.latency.time_in_model_execute: Double(3.6468167304992676)
diff --git a/docs/design/p2p_nccl_connector.md b/docs/design/p2p_nccl_connector.md
index 082dff15ef2c8..94af8bedd24d2 100644
--- a/docs/design/p2p_nccl_connector.md
+++ b/docs/design/p2p_nccl_connector.md
@@ -5,6 +5,7 @@ An implementation of xPyD with dynamic scaling based on point-to-point communica
 ## Detailed Design
 
 ### Overall Process
+
 As shown in Figure 1, the overall process of this **PD disaggregation** solution is described through a request flow:
 
 1. The client sends an HTTP request to the Proxy/Router's `/v1/completions` interface.
@@ -23,7 +24,7 @@ A simple HTTP service acts as the entry point for client requests and starts a b
 
 The Proxy/Router is responsible for selecting 1P1D based on the characteristics of the client request, such as the prompt, and generating a corresponding `request_id`, for example:
 
-```
+```text
 cmpl-___prefill_addr_10.0.1.2:21001___decode_addr_10.0.1.3:22001_93923d63113b4b338973f24d19d4bf11-0
 ```
 
@@ -70,6 +71,7 @@ pip install "vllm>=0.9.2"
 ## Run xPyD
 
 ### Instructions
+
 - The following examples are run on an A800 (80GB) device, using the Meta-Llama-3.1-8B-Instruct model.
 - Pay attention to the setting of the `kv_buffer_size` (in bytes). The empirical value is 10% of the GPU memory size. This is related to the kvcache size. If it is too small, the GPU memory buffer for temporarily storing the received kvcache will overflow, causing the kvcache to be stored in the tensor memory pool, which increases latency. If it is too large, the kvcache available for inference will be reduced, leading to a smaller batch size and decreased throughput.
 - For Prefill instances, when using non-GET mode, the `kv_buffer_size` can be set to 1, as Prefill currently does not need to receive kvcache. However, when using GET mode, a larger `kv_buffer_size` is required because it needs to store the kvcache sent to the D instance.
diff --git a/docs/design/prefix_caching.md b/docs/design/prefix_caching.md
index 2d3c8412894a6..fcc014cf85164 100644
--- a/docs/design/prefix_caching.md
+++ b/docs/design/prefix_caching.md
@@ -18,10 +18,12 @@ In the example above, the KV cache in the first block can be uniquely identified
 * Block tokens: A tuple of tokens in this block. The reason to include the exact tokens is to reduce potential hash value collision.
 * Extra hashes: Other values required to make this block unique, such as LoRA IDs, multi-modality input hashes (see the example below), and cache salts to isolate caches in multi-tenant environments.
 
-> **Note 1:** We only cache full blocks.
+!!! note "Note 1"
+    We only cache full blocks.
 
-> **Note 2:** The above hash key structure is not 100% collision free. Theoretically it’s still possible for the different prefix tokens to have the same hash value. To avoid any hash collisions **in a multi-tenant setup, we advise to use SHA256** as hash function instead of the default builtin hash.
-SHA256 is supported since vLLM v0.8.3 and must be enabled with a command line argument. It comes with a performance impact of about 100-200ns per token (~6ms for 50k tokens of context).
+!!! note "Note 2"
+    The above hash key structure is not 100% collision free. Theoretically it’s still possible for the different prefix tokens to have the same hash value. To avoid any hash collisions **in a multi-tenant setup, we advise to use SHA256** as hash function instead of the default builtin hash.
+    SHA256 is supported since vLLM v0.8.3 and must be enabled with a command line argument. It comes with a performance impact of about 100-200ns per token (~6ms for 50k tokens of context).
 
 **A hashing example with multi-modality inputs**  
 In this example, we illustrate how prefix caching works with multi-modality inputs (e.g., images). Assuming we have a request with the following messages:
@@ -92,7 +94,8 @@ To improve privacy in shared environments, vLLM supports isolating prefix cache
 
 With this setup, cache sharing is limited to users or requests that explicitly agree on a common salt, enabling cache reuse within a trust group while isolating others.
 
-> **Note:** Cache isolation is not supported in engine V0.
+!!! note
+    Cache isolation is not supported in engine V0.
 
 ## Data Structure
 
diff --git a/docs/design/torch_compile.md b/docs/design/torch_compile.md
index ea5d8ac212f7a..2d76e7f3adc5c 100644
--- a/docs/design/torch_compile.md
+++ b/docs/design/torch_compile.md
@@ -8,7 +8,7 @@ Throughout the example, we will run a common Llama model using v1, and turn on d
 
 In the very verbose logs, we can see:
 
-```
+```console
 INFO 03-07 03:06:55 [backends.py:409] Using cache directory: ~/.cache/vllm/torch_compile_cache/1517964802/rank_0_0 for vLLM's torch.compile
 ```
 
@@ -75,7 +75,7 @@ Every submodule can be identified by its index, and will be processed individual
 
 In the very verbose logs, we can also see:
 
-```
+```console
 DEBUG 03-07 03:52:37 [backends.py:134] store the 0-th graph for shape None from inductor via handle ('fpegyiq3v3wzjzphd45wkflpabggdbjpylgr7tta4hj6uplstsiw', '~/.cache/vllm/torch_compile_cache/1517964802/rank_0_0/inductor_cache/iw/ciwzrk3ittdqatuzwonnajywvno3llvjcs2vfdldzwzozn3zi3iy.py')
 DEBUG 03-07 03:52:39 [backends.py:134] store the 1-th graph for shape None from inductor via handle ('f7fmlodmf3h3by5iiu2c4zarwoxbg4eytwr3ujdd2jphl4pospfd', '~/.cache/vllm/torch_compile_cache/1517964802/rank_0_0/inductor_cache/ly/clyfzxldfsj7ehaluis2mca2omqka4r7mgcedlf6xfjh645nw6k2.py')
 ...
@@ -93,7 +93,7 @@ One more detail: you can see that the 1-th graph and the 15-th graph have the sa
 
 If we already have the cache directory (e.g. run the same code for the second time), we will see the following logs:
 
-```
+```console
 DEBUG 03-07 04:00:45 [backends.py:86] Directly load the 0-th graph for shape None from inductor via handle ('fpegyiq3v3wzjzphd45wkflpabggdbjpylgr7tta4hj6uplstsiw', '~/.cache/vllm/torch_compile_cache/1517964802/rank_0_0/inductor_cache/iw/ciwzrk3ittdqatuzwonnajywvno3llvjcs2vfdldzwzozn3zi3iy.py')
 ```
 
diff --git a/docs/features/compatibility_matrix.md b/docs/features/compatibility_matrix.md
index 259a447984cb0..930265b8f9840 100644
--- a/docs/features/compatibility_matrix.md
+++ b/docs/features/compatibility_matrix.md
@@ -36,9 +36,9 @@ th:not(:first-child) {
 
 | Feature | [CP][chunked-prefill] | [APC](automatic_prefix_caching.md) | [LoRA](lora.md) | [SD](spec_decode.md) | CUDA graph | [pooling](../models/pooling_models.md) | <abbr title="Encoder-Decoder Models">enc-dec</abbr> | <abbr title="Logprobs">logP</abbr> | <abbr title="Prompt Logprobs">prmpt logP</abbr> | <abbr title="Async Output Processing">async output</abbr> | multi-step | <abbr title="Multimodal Inputs">mm</abbr> | best-of | beam-search |
 |---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
-| [CP][chunked-prefill] | ✅ | | | | | | | | | | | | | | |
-| [APC](automatic_prefix_caching.md) | ✅ | ✅ | | | | | | | | | | | | | |
-| [LoRA](lora.md) | ✅ | ✅ | ✅ | | | | | | | | | | | | |
+| [CP][chunked-prefill] | ✅ | | | | | | | | | | | | | |
+| [APC](automatic_prefix_caching.md) | ✅ | ✅ | | | | | | | | | | | | |
+| [LoRA](lora.md) | ✅ | ✅ | ✅ | | | | | | | | | | | |
 | [SD](spec_decode.md) | ✅ | ✅ | ❌ | ✅ | | | | | | | | | | |
 | CUDA graph | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | | |
 | [pooling](../models/pooling_models.md) | ✅\* | ✅\* | ✅ | ❌ | ✅ | ✅ | | | | | | | | |
diff --git a/docs/features/lora.md b/docs/features/lora.md
index ea1b495138c1b..a4e05dae11c2e 100644
--- a/docs/features/lora.md
+++ b/docs/features/lora.md
@@ -119,6 +119,7 @@ export VLLM_ALLOW_RUNTIME_LORA_UPDATING=True
 ```
 
 ### Using API Endpoints
+
 Loading a LoRA Adapter:
 
 To dynamically load a LoRA adapter, send a POST request to the `/v1/load_lora_adapter` endpoint with the necessary
@@ -156,6 +157,7 @@ curl -X POST http://localhost:8000/v1/unload_lora_adapter \
 ```
 
 ### Using Plugins
+
 Alternatively, you can use the LoRAResolver plugin to dynamically load LoRA adapters. LoRAResolver plugins enable you to load LoRA adapters from both local and remote sources such as local file system and S3. On every request, when there's a new model name that hasn't been loaded yet, the LoRAResolver will try to resolve and load the corresponding LoRA adapter.
 
 You can set up multiple LoRAResolver plugins if you want to load LoRA adapters from different sources. For example, you might have one resolver for local files and another for S3 storage. vLLM will load the first LoRA adapter that it finds.
diff --git a/docs/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md
index d4c8852206bba..b8677f11a1d3c 100644
--- a/docs/features/multimodal_inputs.md
+++ b/docs/features/multimodal_inputs.md
@@ -588,7 +588,9 @@ Full example: <gh-file:examples/online_serving/openai_chat_completion_client_for
 
 To input pre-computed embeddings belonging to a data type (i.e. image, video, or audio) directly to the language model,
 pass a tensor of shape to the corresponding field of the multi-modal dictionary.
+
 #### Image Embedding Inputs
+
 For image embeddings, you can pass the base64-encoded tensor to the `image_embeds` field.
 The following example demonstrates how to pass image embeddings to the OpenAI server:
 
diff --git a/docs/features/quantization/auto_round.md b/docs/features/quantization/auto_round.md
index 2dfd847bb7d9a..ac766d5e29228 100644
--- a/docs/features/quantization/auto_round.md
+++ b/docs/features/quantization/auto_round.md
@@ -97,7 +97,7 @@ for output in outputs:
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 ```
 
-# Acknowledgement
+## Acknowledgement
 
 Special thanks to open-source low precision libraries such as AutoGPTQ, AutoAWQ, GPTQModel, Triton, Marlin, and
 ExLLaMAV2 for providing low-precision CUDA kernels, which are leveraged in AutoRound.
diff --git a/docs/features/quantization/int4.md b/docs/features/quantization/int4.md
index 1df32a11ed9db..127e403989944 100644
--- a/docs/features/quantization/int4.md
+++ b/docs/features/quantization/int4.md
@@ -134,8 +134,8 @@ lm_eval --model vllm \
 - Employ the chat template or instruction template that the model was trained with
 - If you've fine-tuned a model, consider using a sample of your training data for calibration
 - Tune key hyperparameters to the quantization algorithm:
-  - `dampening_frac` sets how much influence the GPTQ algorithm has. Lower values can improve accuracy, but can lead to numerical instabilities that cause the algorithm to fail.
-  - `actorder` sets the activation ordering. When compressing the weights of a layer weight, the order in which channels are quantized matters. Setting `actorder="weight"` can improve accuracy without added latency.
+    - `dampening_frac` sets how much influence the GPTQ algorithm has. Lower values can improve accuracy, but can lead to numerical instabilities that cause the algorithm to fail.
+    - `actorder` sets the activation ordering. When compressing the weights of a layer weight, the order in which channels are quantized matters. Setting `actorder="weight"` can improve accuracy without added latency.
 
 The following is an example of an expanded quantization recipe you can tune to your own use case:
 
diff --git a/docs/features/quantization/quantized_kvcache.md b/docs/features/quantization/quantized_kvcache.md
index c54ec43658a43..b2b417309e92b 100644
--- a/docs/features/quantization/quantized_kvcache.md
+++ b/docs/features/quantization/quantized_kvcache.md
@@ -50,6 +50,7 @@ Here is an example of how to enable FP8 quantization:
     ```
 
 The `kv_cache_dtype` argument specifies the data type for KV cache storage:
+
 - `"auto"`: Uses the model's default "unquantized" data type
 - `"fp8"` or `"fp8_e4m3"`: Supported on CUDA 11.8+ and ROCm (AMD GPU)
 - `"fp8_e5m2"`: Supported on CUDA 11.8+
diff --git a/docs/features/quantization/quark.md b/docs/features/quantization/quark.md
index 5abfae35eeec4..e8ed2155375d4 100644
--- a/docs/features/quantization/quark.md
+++ b/docs/features/quantization/quark.md
@@ -213,6 +213,7 @@ lm_eval --model vllm \
 ```
 
 ## Quark Quantization Script
+
 In addition to the example of Python API above, Quark also offers a
 [quantization script](https://quark.docs.amd.com/latest/pytorch/example_quark_torch_llm_ptq.html)
 to quantize large language models more conveniently. It supports quantizing models with variety
diff --git a/docs/features/quantization/torchao.md b/docs/features/quantization/torchao.md
index ab6802177048b..6932445997012 100644
--- a/docs/features/quantization/torchao.md
+++ b/docs/features/quantization/torchao.md
@@ -13,6 +13,7 @@ pip install \
 ```
 
 ## Quantizing HuggingFace Models
+
 You can quantize your own huggingface model with torchao, e.g. [transformers](https://huggingface.co/docs/transformers/main/en/quantization/torchao) and [diffusers](https://huggingface.co/docs/diffusers/en/quantization/torchao), and save the checkpoint to huggingface hub like [this](https://huggingface.co/jerryzh168/llama3-8b-int8wo) with the following example code:
 
 ??? code
diff --git a/docs/getting_started/installation/cpu.md b/docs/getting_started/installation/cpu.md
index 2d2598da943c7..7a34d47d8e494 100644
--- a/docs/getting_started/installation/cpu.md
+++ b/docs/getting_started/installation/cpu.md
@@ -164,7 +164,7 @@ Note, it is recommended to manually reserve 1 CPU for vLLM front-end process whe
 
 ### How to decide `VLLM_CPU_KVCACHE_SPACE`?
 
-  - This value is 4GB by default. Larger space can support more concurrent requests, longer context length. However, users should take care of memory capacity of each NUMA node. The memory usage of each TP rank is the sum of `weight shard size` and `VLLM_CPU_KVCACHE_SPACE`, if it exceeds the capacity of a single NUMA node, the TP worker will be killed with `exitcode 9` due to out-of-memory.
+This value is 4GB by default. Larger space can support more concurrent requests, longer context length. However, users should take care of memory capacity of each NUMA node. The memory usage of each TP rank is the sum of `weight shard size` and `VLLM_CPU_KVCACHE_SPACE`, if it exceeds the capacity of a single NUMA node, the TP worker will be killed with `exitcode 9` due to out-of-memory.
 
 ### How to do performance tuning for vLLM CPU?
 
@@ -183,13 +183,13 @@ vLLM CPU supports tensor parallel (TP) and pipeline parallel (PP) to leverage mu
 
 ### Which quantization configs does vLLM CPU support?
 
-  - vLLM CPU supports quantizations:
+- vLLM CPU supports quantizations:
     - AWQ (x86 only)
     - GPTQ (x86 only)
     - compressed-tensor INT8 W8A8 (x86, s390x)
 
 ### (x86 only) What is the purpose of `VLLM_CPU_MOE_PREPACK` and `VLLM_CPU_SGL_KERNEL`?
 
-  - Both of them requires `amx` CPU flag.
+- Both of them requires `amx` CPU flag.
     - `VLLM_CPU_MOE_PREPACK` can provides better performance for MoE models
     - `VLLM_CPU_SGL_KERNEL` can provides better performance for MoE models and small-batch scenarios.
diff --git a/docs/getting_started/installation/intel_gaudi.md b/docs/getting_started/installation/intel_gaudi.md
index 0be0d02d0679c..61b2b02aa10ba 100644
--- a/docs/getting_started/installation/intel_gaudi.md
+++ b/docs/getting_started/installation/intel_gaudi.md
@@ -339,13 +339,13 @@ Each described step is logged by vLLM server, as follows (negative values corres
 
 - `VLLM_{phase}_{dim}_BUCKET_{param}` - collection of 12 environment variables configuring ranges of bucketing mechanism
 
-    * `{phase}` is either `PROMPT` or `DECODE`
+    - `{phase}` is either `PROMPT` or `DECODE`
 
-    * `{dim}` is either `BS`, `SEQ` or `BLOCK`
+    - `{dim}` is either `BS`, `SEQ` or `BLOCK`
 
-    * `{param}` is either `MIN`, `STEP` or `MAX`
+    - `{param}` is either `MIN`, `STEP` or `MAX`
 
-    * Default values:
+    - Default values:
 
 | `{phase}` | Parameter | Env Variable | Value Expression |
 |-----------|-----------|--------------|------------------|
diff --git a/docs/models/hardware_supported_models/tpu.md b/docs/models/hardware_supported_models/tpu.md
index da03a3b3160ad..7b0a5ba6e72da 100644
--- a/docs/models/hardware_supported_models/tpu.md
+++ b/docs/models/hardware_supported_models/tpu.md
@@ -1,7 +1,8 @@
 # TPU
 
-# TPU Supported Models
-## Text-only Language Models
+## Supported Models
+
+### Text-only Language Models
 
 | Model                                               | Architecture                   | Supported |
 |-----------------------------------------------------|--------------------------------|-----------|
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 763dd8fd50451..e2172051cd186 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -45,10 +45,10 @@ If a model is neither supported natively by vLLM or Transformers, it can still b
 For a model to be compatible with the Transformers backend for vLLM it must:
 
 - be a Transformers compatible custom model (see [Transformers - Customizing models](https://huggingface.co/docs/transformers/en/custom_models)):
-    * The model directory must have the correct structure (e.g. `config.json` is present).
-    * `config.json` must contain `auto_map.AutoModel`.
+    - The model directory must have the correct structure (e.g. `config.json` is present).
+    - `config.json` must contain `auto_map.AutoModel`.
 - be a Transformers backend for vLLM compatible model (see [writing-custom-models][writing-custom-models]):
-    * Customisation should be done in the base model (e.g. in `MyModel`, not `MyModelForCausalLM`).
+    - Customisation should be done in the base model (e.g. in `MyModel`, not `MyModelForCausalLM`).
 
 If the compatible model is:
 
@@ -134,10 +134,10 @@ class MyConfig(PretrainedConfig):
 
 - `base_model_tp_plan` is a `dict` that maps fully qualified layer name patterns to tensor parallel styles (currently only `"colwise"` and `"rowwise"` are supported).
 - `base_model_pp_plan` is a `dict` that maps direct child layer names to `tuple`s of `list`s of `str`s:
-    * You only need to do this for layers which are not present on all pipeline stages
-    * vLLM assumes that there will be only one `nn.ModuleList`, which is distributed across the pipeline stages
-    * The `list` in the first element of the `tuple` contains the names of the input arguments
-    * The `list` in the last element of the `tuple` contains the names of the variables the layer outputs to in your modeling code
+    - You only need to do this for layers which are not present on all pipeline stages
+    - vLLM assumes that there will be only one `nn.ModuleList`, which is distributed across the pipeline stages
+    - The `list` in the first element of the `tuple` contains the names of the input arguments
+    - The `list` in the last element of the `tuple` contains the names of the variables the layer outputs to in your modeling code
 
 ## Loading a Model
 
diff --git a/docs/serving/distributed_serving.md b/docs/serving/distributed_serving.md
index 4f111115f3073..9304976572788 100644
--- a/docs/serving/distributed_serving.md
+++ b/docs/serving/distributed_serving.md
@@ -99,7 +99,7 @@ From any node, enter a container and run `ray status` and `ray list nodes` to ve
 ### Running vLLM on a Ray cluster
 
 !!! tip
-     If Ray is running inside containers, run the commands in the remainder of this guide _inside the containers_, not on the host. To open a shell inside a container, connect to a node and use `docker exec -it <container_name> /bin/bash`.
+    If Ray is running inside containers, run the commands in the remainder of this guide *inside the containers*, not on the host. To open a shell inside a container, connect to a node and use `docker exec -it <container_name> /bin/bash`.
 
 Once a Ray cluster is running, use vLLM as you would in a single-node setting. All resources across the Ray cluster are visible to vLLM, so a single `vllm` command on a single node is sufficient.
 
diff --git a/docs/serving/expert_parallel_deployment.md b/docs/serving/expert_parallel_deployment.md
index d79b6fc590189..280b3322b11c3 100644
--- a/docs/serving/expert_parallel_deployment.md
+++ b/docs/serving/expert_parallel_deployment.md
@@ -31,11 +31,12 @@ vLLM provides three communication backends for EP:
 
 Enable EP by setting the `--enable-expert-parallel` flag. The EP size is automatically calculated as:
 
-```
+```text
 EP_SIZE = TP_SIZE × DP_SIZE
 ```
 
 Where:
+
 - `TP_SIZE`: Tensor parallel size (always 1 for now)
 - `DP_SIZE`: Data parallel size
 - `EP_SIZE`: Expert parallel size (computed automatically)
diff --git a/docs/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md
index 4eb2ea2731817..dfed15d4ace97 100644
--- a/docs/serving/openai_compatible_server.md
+++ b/docs/serving/openai_compatible_server.md
@@ -206,6 +206,7 @@ you can use the [official OpenAI Python client](https://github.com/openai/openai
 We support both [Vision](https://platform.openai.com/docs/guides/vision)- and
 [Audio](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in)-related parameters;
 see our [Multimodal Inputs](../features/multimodal_inputs.md) guide for more information.
+
 - *Note: `image_url.detail` parameter is not supported.*
 
 Code example: <gh-file:examples/online_serving/openai_chat_completion_client.py>
diff --git a/docs/usage/security.md b/docs/usage/security.md
index 76140434dcb36..d54e2bb37ec07 100644
--- a/docs/usage/security.md
+++ b/docs/usage/security.md
@@ -13,15 +13,18 @@ All communications between nodes in a multi-node vLLM deployment are **insecure
 The following options control inter-node communications in vLLM:
 
 #### 1. **Environment Variables:**
-   - `VLLM_HOST_IP`: Sets the IP address for vLLM processes to communicate on
+
+- `VLLM_HOST_IP`: Sets the IP address for vLLM processes to communicate on
 
 #### 2. **KV Cache Transfer Configuration:**
-   - `--kv-ip`: The IP address for KV cache transfer communications (default: 127.0.0.1)
-   - `--kv-port`: The port for KV cache transfer communications (default: 14579)
+
+- `--kv-ip`: The IP address for KV cache transfer communications (default: 127.0.0.1)
+- `--kv-port`: The port for KV cache transfer communications (default: 14579)
 
 #### 3. **Data Parallel Configuration:**
-   - `data_parallel_master_ip`: IP of the data parallel master (default: 127.0.0.1)
-   - `data_parallel_master_port`: Port of the data parallel master (default: 29500)
+
+- `data_parallel_master_ip`: IP of the data parallel master (default: 127.0.0.1)
+- `data_parallel_master_port`: Port of the data parallel master (default: 29500)
 
 ### Notes on PyTorch Distributed
 
@@ -41,18 +44,21 @@ Key points from the PyTorch security guide:
 ### Security Recommendations
 
 #### 1. **Network Isolation:**
-   - Deploy vLLM nodes on a dedicated, isolated network
-   - Use network segmentation to prevent unauthorized access
-   - Implement appropriate firewall rules
+
+- Deploy vLLM nodes on a dedicated, isolated network
+- Use network segmentation to prevent unauthorized access
+- Implement appropriate firewall rules
 
 #### 2. **Configuration Best Practices:**
-   - Always set `VLLM_HOST_IP` to a specific IP address rather than using defaults
-   - Configure firewalls to only allow necessary ports between nodes
+
+- Always set `VLLM_HOST_IP` to a specific IP address rather than using defaults
+- Configure firewalls to only allow necessary ports between nodes
 
 #### 3. **Access Control:**
-   - Restrict physical and network access to the deployment environment
-   - Implement proper authentication and authorization for management interfaces
-   - Follow the principle of least privilege for all system components
+
+- Restrict physical and network access to the deployment environment
+- Implement proper authentication and authorization for management interfaces
+- Follow the principle of least privilege for all system components
 
 ## Security and Firewalls: Protecting Exposed vLLM Systems
 
diff --git a/docs/usage/v1_guide.md b/docs/usage/v1_guide.md
index 498ff3da0ca31..38399c6633bdb 100644
--- a/docs/usage/v1_guide.md
+++ b/docs/usage/v1_guide.md
@@ -148,7 +148,7 @@ are not yet supported.
 vLLM V1 supports logprobs and prompt logprobs. However, there are some important semantic
 differences compared to V0:
 
-**Logprobs Calculation**
+##### Logprobs Calculation
 
 Logprobs in V1 are now returned immediately once computed from the model’s raw output (i.e.
 before applying any logits post-processing such as temperature scaling or penalty
@@ -157,7 +157,7 @@ probabilities used during sampling.
 
 Support for logprobs with post-sampling adjustments is in progress and will be added in future updates.
 
-**Prompt Logprobs with Prefix Caching**
+##### Prompt Logprobs with Prefix Caching
 
 Currently prompt logprobs are only supported when prefix caching is turned off via `--no-enable-prefix-caching`. In a future release, prompt logprobs will be compatible with prefix caching, but a recomputation will be triggered to recover the full prompt logprobs even upon a prefix cache hit. See details in [RFC #13414](gh-issue:13414).
 
@@ -165,7 +165,7 @@ Currently prompt logprobs are only supported when prefix caching is turned off v
 
 As part of the major architectural rework in vLLM V1, several legacy features have been deprecated.
 
-**Sampling features**
+##### Sampling features
 
 - **best_of**: This feature has been deprecated due to limited usage. See details at [RFC #13361](gh-issue:13361).
 - **Per-Request Logits Processors**: In V0, users could pass custom
@@ -173,11 +173,11 @@ As part of the major architectural rework in vLLM V1, several legacy features ha
   feature has been deprecated. Instead, the design is moving toward supporting **global logits
   processors**, a feature the team is actively working on for future releases. See details at [RFC #13360](gh-pr:13360).
 
-**KV Cache features**
+##### KV Cache features
 
 - **GPU <> CPU KV Cache Swapping**: with the new simplified core architecture, vLLM V1 no longer requires KV cache swapping
 to handle request preemptions.
 
-**Structured Output features**
+##### Structured Output features
 
 - **Request-level Structured Output Backend**: Deprecated, alternative backends (outlines, guidance) with fallbacks is supported now.
diff --git a/examples/offline_inference/disaggregated-prefill-v1/README.md b/examples/offline_inference/disaggregated-prefill-v1/README.md
index 9cbdb19820f56..abf6883f8d3ef 100644
--- a/examples/offline_inference/disaggregated-prefill-v1/README.md
+++ b/examples/offline_inference/disaggregated-prefill-v1/README.md
@@ -5,6 +5,6 @@ This example contains scripts that demonstrate disaggregated prefill in the offl
 ## Files
 
 - `run.sh` - A helper script that will run `prefill_example.py` and `decode_example.py` sequentially.
-  - Make sure you are in the `examples/offline_inference/disaggregated-prefill-v1` directory before running `run.sh`.
+    - Make sure you are in the `examples/offline_inference/disaggregated-prefill-v1` directory before running `run.sh`.
 - `prefill_example.py` - A script which performs prefill only, saving the KV state to the `local_storage` directory and the prompts to `output.txt`.
 - `decode_example.py` - A script which performs decode only, loading the KV state from the `local_storage` directory and the prompts from `output.txt`.
diff --git a/examples/offline_inference/openai_batch/README.md b/examples/offline_inference/openai_batch/README.md
index 631fde91fcd08..3c6f6c7a6c588 100644
--- a/examples/offline_inference/openai_batch/README.md
+++ b/examples/offline_inference/openai_batch/README.md
@@ -19,9 +19,9 @@ We currently support `/v1/chat/completions`, `/v1/embeddings`, and `/v1/score` e
 ## Pre-requisites
 
 * The examples in this document use `meta-llama/Meta-Llama-3-8B-Instruct`.
-  - Create a [user access token](https://huggingface.co/docs/hub/en/security-tokens)
-  - Install the token on your machine (Run `huggingface-cli login`).
-  - Get access to the gated model by [visiting the model card](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) and agreeing to the terms and conditions.
+    * Create a [user access token](https://huggingface.co/docs/hub/en/security-tokens)
+    * Install the token on your machine (Run `huggingface-cli login`).
+    * Get access to the gated model by [visiting the model card](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) and agreeing to the terms and conditions.
 
 ## Example 1: Running with a local file
 
@@ -105,7 +105,7 @@ To integrate with cloud blob storage, we recommend using presigned urls.
 
 * [Create an S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/creating-bucket.html).
 * The `awscli` package (Run `pip install awscli`) to configure your credentials and interactively use s3.
-  - [Configure your credentials](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html).
+    * [Configure your credentials](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html).
 * The `boto3` python package (Run `pip install boto3`) to generate presigned urls.
 
 ### Step 1: Upload your input script
diff --git a/examples/others/lmcache/README.md b/examples/others/lmcache/README.md
index 95a6bf995b2fd..759be55d6f1c5 100644
--- a/examples/others/lmcache/README.md
+++ b/examples/others/lmcache/README.md
@@ -28,16 +28,20 @@ to run disaggregated prefill and benchmark the performance.
 ### Components
 
 #### Server Scripts
+
 - `disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh` - Launches individual vLLM servers for prefill/decode, and also launches the proxy server.
 - `disagg_prefill_lmcache_v1/disagg_proxy_server.py` - FastAPI proxy server that coordinates between prefiller and decoder
 - `disagg_prefill_lmcache_v1/disagg_example_nixl.sh` - Main script to run the example
 
 #### Configuration
+
 - `disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml` - Configuration for prefiller server
 - `disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml` - Configuration for decoder server
 
 #### Log Files
+
 The main script generates several log files:
+
 - `prefiller.log` - Logs from the prefill server
 - `decoder.log` - Logs from the decode server
 - `proxy.log` - Logs from the proxy server
diff --git a/examples/others/logging_configuration.md b/examples/others/logging_configuration.md
index 916ab5fd1c871..7c8bdd199a72d 100644
--- a/examples/others/logging_configuration.md
+++ b/examples/others/logging_configuration.md
@@ -8,11 +8,11 @@ of logging configurations that range from simple-and-inflexible to
 more-complex-and-more-flexible.
 
 - No vLLM logging (simple and inflexible)
-  - Set `VLLM_CONFIGURE_LOGGING=0` (leaving `VLLM_LOGGING_CONFIG_PATH` unset)
+    - Set `VLLM_CONFIGURE_LOGGING=0` (leaving `VLLM_LOGGING_CONFIG_PATH` unset)
 - vLLM's default logging configuration (simple and inflexible)
-  - Leave `VLLM_CONFIGURE_LOGGING` unset or set `VLLM_CONFIGURE_LOGGING=1`
+    - Leave `VLLM_CONFIGURE_LOGGING` unset or set `VLLM_CONFIGURE_LOGGING=1`
 - Fine-grained custom logging configuration (more complex, more flexible)
-  - Leave `VLLM_CONFIGURE_LOGGING` unset or set `VLLM_CONFIGURE_LOGGING=1` and
+    - Leave `VLLM_CONFIGURE_LOGGING` unset or set `VLLM_CONFIGURE_LOGGING=1` and
     set `VLLM_LOGGING_CONFIG_PATH=<path-to-logging-config.json>`
 
 ## Logging Configuration Environment Variables
diff --git a/pyproject.toml b/pyproject.toml
index a65267942d47e..dfad5d2cdf319 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -156,16 +156,6 @@ markers = [
     "optional: optional tests that are automatically skipped, include --optional to run them",
 ]
 
-[tool.pymarkdown]
-plugins.md004.style = "sublist" # ul-style
-plugins.md007.indent = 4 # ul-indent
-plugins.md007.start_indented = true # ul-indent
-plugins.md013.enabled = false # line-length
-plugins.md041.enabled = false # first-line-h1
-plugins.md033.enabled = false # inline-html
-plugins.md046.enabled = false # code-block-style
-plugins.md024.allow_different_nesting = true # no-duplicate-headers
-
 [tool.ty.src]
 root = "./vllm"
 respect-ignore-files = true
diff --git a/tools/ep_kernels/README.md b/tools/ep_kernels/README.md
index f1479146f053c..273e0f378e343 100644
--- a/tools/ep_kernels/README.md
+++ b/tools/ep_kernels/README.md
@@ -1,6 +1,9 @@
+# Expert parallel kernels
+
 Large-scale cluster-level expert parallel, as described in the [DeepSeek-V3 Technical Report](http://arxiv.org/abs/2412.19437), is an efficient way to deploy sparse MoE models with many experts. However, such deployment requires many components beyond a normal Python package, including system package support and system driver support. It is impossible to bundle all these components into a Python package.
 
 Here we break down the requirements in 2 steps:
+
 1. Build and install the Python libraries (both [pplx-kernels](https://github.com/ppl-ai/pplx-kernels) and [DeepEP](https://github.com/deepseek-ai/DeepEP)), including necessary dependencies like NVSHMEM. This step does not require any privileged access. Any user can do this.
 2. Configure NVIDIA driver to enable IBGDA. This step requires root access, and must be done on the host machine.
 
@@ -8,15 +11,15 @@ Here we break down the requirements in 2 steps:
 
 All scripts accept a positional argument as workspace path for staging the build, defaulting to `$(pwd)/ep_kernels_workspace`.
 
-# Usage
+## Usage
 
-## Single-node
+### Single-node
 
 ```bash
 bash install_python_libraries.sh
 ```
 
-## Multi-node
+### Multi-node
 
 ```bash
 bash install_python_libraries.sh
diff --git a/vllm/plugins/lora_resolvers/README.md b/vllm/plugins/lora_resolvers/README.md
index 7e7c55f5c69c7..48f27dddea07e 100644
--- a/vllm/plugins/lora_resolvers/README.md
+++ b/vllm/plugins/lora_resolvers/README.md
@@ -6,7 +6,8 @@ via the LoRAResolver plugin framework.
 Note that `VLLM_ALLOW_RUNTIME_LORA_UPDATING` must be set to true to allow LoRA resolver plugins
 to work, and `VLLM_PLUGINS` must be set to include the desired resolver plugins.
 
-# lora_filesystem_resolver
+## lora_filesystem_resolver
+
 This LoRA Resolver is installed with vLLM by default.
 To use, set `VLLM_PLUGIN_LORA_CACHE_DIR` to a local directory. When vLLM receives a request
 for a LoRA adapter `foobar` it doesn't currently recognize, it will look in that local directory

From 76080cff79b5b56e3d8b6a2fb9b9c5b4c4633c67 Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Tue, 29 Jul 2025 19:45:18 -0700
Subject: [PATCH 032/224] [DOC] Fix path of v1 related figures (#21868)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 .../design/{v1 => }/metrics/intervals-1.png     | Bin
 .../design/{v1 => }/metrics/intervals-2.png     | Bin
 .../design/{v1 => }/metrics/intervals-3.png     | Bin
 .../{v1 => }/prefix_caching/example-time-1.png  | Bin
 .../{v1 => }/prefix_caching/example-time-3.png  | Bin
 .../{v1 => }/prefix_caching/example-time-4.png  | Bin
 .../{v1 => }/prefix_caching/example-time-5.png  | Bin
 .../{v1 => }/prefix_caching/example-time-6.png  | Bin
 .../{v1 => }/prefix_caching/example-time-7.png  | Bin
 .../design/{v1 => }/prefix_caching/free.png     | Bin
 .../design/{v1 => }/prefix_caching/overview.png | Bin
 .../design/{v1 => }/tpu/most_model_len.png      | Bin
 docs/configuration/tpu.md                       |   2 +-
 docs/design/metrics.md                          |   6 +++---
 docs/design/prefix_caching.md                   |  16 ++++++++--------
 15 files changed, 12 insertions(+), 12 deletions(-)
 rename docs/assets/design/{v1 => }/metrics/intervals-1.png (100%)
 rename docs/assets/design/{v1 => }/metrics/intervals-2.png (100%)
 rename docs/assets/design/{v1 => }/metrics/intervals-3.png (100%)
 rename docs/assets/design/{v1 => }/prefix_caching/example-time-1.png (100%)
 rename docs/assets/design/{v1 => }/prefix_caching/example-time-3.png (100%)
 rename docs/assets/design/{v1 => }/prefix_caching/example-time-4.png (100%)
 rename docs/assets/design/{v1 => }/prefix_caching/example-time-5.png (100%)
 rename docs/assets/design/{v1 => }/prefix_caching/example-time-6.png (100%)
 rename docs/assets/design/{v1 => }/prefix_caching/example-time-7.png (100%)
 rename docs/assets/design/{v1 => }/prefix_caching/free.png (100%)
 rename docs/assets/design/{v1 => }/prefix_caching/overview.png (100%)
 rename docs/assets/design/{v1 => }/tpu/most_model_len.png (100%)

diff --git a/docs/assets/design/v1/metrics/intervals-1.png b/docs/assets/design/metrics/intervals-1.png
similarity index 100%
rename from docs/assets/design/v1/metrics/intervals-1.png
rename to docs/assets/design/metrics/intervals-1.png
diff --git a/docs/assets/design/v1/metrics/intervals-2.png b/docs/assets/design/metrics/intervals-2.png
similarity index 100%
rename from docs/assets/design/v1/metrics/intervals-2.png
rename to docs/assets/design/metrics/intervals-2.png
diff --git a/docs/assets/design/v1/metrics/intervals-3.png b/docs/assets/design/metrics/intervals-3.png
similarity index 100%
rename from docs/assets/design/v1/metrics/intervals-3.png
rename to docs/assets/design/metrics/intervals-3.png
diff --git a/docs/assets/design/v1/prefix_caching/example-time-1.png b/docs/assets/design/prefix_caching/example-time-1.png
similarity index 100%
rename from docs/assets/design/v1/prefix_caching/example-time-1.png
rename to docs/assets/design/prefix_caching/example-time-1.png
diff --git a/docs/assets/design/v1/prefix_caching/example-time-3.png b/docs/assets/design/prefix_caching/example-time-3.png
similarity index 100%
rename from docs/assets/design/v1/prefix_caching/example-time-3.png
rename to docs/assets/design/prefix_caching/example-time-3.png
diff --git a/docs/assets/design/v1/prefix_caching/example-time-4.png b/docs/assets/design/prefix_caching/example-time-4.png
similarity index 100%
rename from docs/assets/design/v1/prefix_caching/example-time-4.png
rename to docs/assets/design/prefix_caching/example-time-4.png
diff --git a/docs/assets/design/v1/prefix_caching/example-time-5.png b/docs/assets/design/prefix_caching/example-time-5.png
similarity index 100%
rename from docs/assets/design/v1/prefix_caching/example-time-5.png
rename to docs/assets/design/prefix_caching/example-time-5.png
diff --git a/docs/assets/design/v1/prefix_caching/example-time-6.png b/docs/assets/design/prefix_caching/example-time-6.png
similarity index 100%
rename from docs/assets/design/v1/prefix_caching/example-time-6.png
rename to docs/assets/design/prefix_caching/example-time-6.png
diff --git a/docs/assets/design/v1/prefix_caching/example-time-7.png b/docs/assets/design/prefix_caching/example-time-7.png
similarity index 100%
rename from docs/assets/design/v1/prefix_caching/example-time-7.png
rename to docs/assets/design/prefix_caching/example-time-7.png
diff --git a/docs/assets/design/v1/prefix_caching/free.png b/docs/assets/design/prefix_caching/free.png
similarity index 100%
rename from docs/assets/design/v1/prefix_caching/free.png
rename to docs/assets/design/prefix_caching/free.png
diff --git a/docs/assets/design/v1/prefix_caching/overview.png b/docs/assets/design/prefix_caching/overview.png
similarity index 100%
rename from docs/assets/design/v1/prefix_caching/overview.png
rename to docs/assets/design/prefix_caching/overview.png
diff --git a/docs/assets/design/v1/tpu/most_model_len.png b/docs/assets/design/tpu/most_model_len.png
similarity index 100%
rename from docs/assets/design/v1/tpu/most_model_len.png
rename to docs/assets/design/tpu/most_model_len.png
diff --git a/docs/configuration/tpu.md b/docs/configuration/tpu.md
index 0ff0cdda380e9..a2941c80bd27c 100644
--- a/docs/configuration/tpu.md
+++ b/docs/configuration/tpu.md
@@ -47,7 +47,7 @@ This initial compilation time ranges significantly and is impacted by many of th
 
 #### max model len vs. most model len
 
-![most_model_len](../assets/design/v1/tpu/most_model_len.png)
+![most_model_len](../assets/design/tpu/most_model_len.png)
 
 If most of your requests are shorter than the maximum model length but you still need to accommodate occasional longer requests, setting a high maximum model length can negatively impact performance. In these cases, you can try introducing most model len by specifying the `VLLM_TPU_MOST_MODEL_LEN` environment variable.
 
diff --git a/docs/design/metrics.md b/docs/design/metrics.md
index ba34c7dca0017..1f65331d3c0a9 100644
--- a/docs/design/metrics.md
+++ b/docs/design/metrics.md
@@ -223,7 +223,7 @@ And the calculated intervals are:
 
 Put another way:
 
-![Interval calculations - common case](../../assets/design/v1/metrics/intervals-1.png)
+![Interval calculations - common case](../assets/design/metrics/intervals-1.png)
 
 We explored the possibility of having the frontend calculate these
 intervals using the timing of events visible by the frontend. However,
@@ -238,13 +238,13 @@ When a preemption occurs during decode, since any already generated
 tokens are reused, we consider the preemption as affecting the
 inter-token, decode, and inference intervals.
 
-![Interval calculations - preempted decode](../../assets/design/v1/metrics/intervals-2.png)
+![Interval calculations - preempted decode](../assets/design/metrics/intervals-2.png)
 
 When a preemption occurs during prefill (assuming such an event
 is possible), we consider the preemption as affecting the
 time-to-first-token and prefill intervals.
 
-![Interval calculations - preempted prefill](../../assets/design/v1/metrics/intervals-3.png)
+![Interval calculations - preempted prefill](../assets/design/metrics/intervals-3.png)
 
 ### Frontend Stats Collection
 
diff --git a/docs/design/prefix_caching.md b/docs/design/prefix_caching.md
index fcc014cf85164..9941837bf1652 100644
--- a/docs/design/prefix_caching.md
+++ b/docs/design/prefix_caching.md
@@ -125,7 +125,7 @@ There are two design points to highlight:
 
 As a result, we will have the following components when the KV cache manager is initialized:
 
-![Component Overview](../../assets/design/v1/prefix_caching/overview.png)
+![Component Overview](../assets/design/prefix_caching/overview.png)
 
 * Block Pool: A list of KVCacheBlock.  
 * Free Block Queue: Only store the pointers of head and tail blocks for manipulations.  
@@ -195,7 +195,7 @@ As can be seen, block 3 is a new full block and is cached. However, it is redund
 
 When a request is finished, we free all its blocks if no other requests are using them (reference count = 0). In this example, we free request 1 and block 2, 3, 4, 8 associated with it. We can see that the freed blocks are added to the tail of the free queue in the *reverse* order. This is because the last block of a request must hash more tokens and is less likely to be reused by other requests. As a result, it should be evicted first.
 
-![Free queue after a request us freed](../../assets/design/v1/prefix_caching/free.png)
+![Free queue after a request us freed](../assets/design/prefix_caching/free.png)
 
 ### Eviction (LRU)
 
@@ -211,24 +211,24 @@ In this example, we assume the block size is 4 (each block can cache 4 tokens),
 
 **Time 1: The cache is empty and a new request comes in.** We allocate 4 blocks. 3 of them are already full and cached. The fourth block is partially full with 3 of 4 tokens.
 
-![Example Time 1](../../assets/design/v1/prefix_caching/example-time-1.png)
+![Example Time 1](../assets/design/prefix_caching/example-time-1.png)
 
 **Time 3: Request 0 makes the block 3 full and asks for a new block to keep decoding.** We cache block 3 and allocate block 4.
 
-![Example Time 3](../../assets/design/v1/prefix_caching/example-time-3.png)
+![Example Time 3](../assets/design/prefix_caching/example-time-3.png)
 
 **Time 4: Request 1 comes in with the 14 prompt tokens, where the first 10 tokens are the same as request 0.** We can see that only the first 2 blocks (8 tokens) hit the cache, because the 3rd block only matches 2 of 4 tokens.
 
-![Example Time 4](../../assets/design/v1/prefix_caching/example-time-4.png)
+![Example Time 4](../assets/design/prefix_caching/example-time-4.png)
 
 **Time 5: Request 0 is finished and free.** Blocks 2, 3 and 4 are added to the free queue in the reverse order (but block 2 and 3 are still cached). Block 0 and 1 are not added to the free queue because they are being used by Request 1.
 
-![Example Time 5](../../assets/design/v1/prefix_caching/example-time-5.png)
+![Example Time 5](../assets/design/prefix_caching/example-time-5.png)
 
 **Time 6: Request 1 is finished and free.**
 
-![Example Time 6](../../assets/design/v1/prefix_caching/example-time-6.png)
+![Example Time 6](../assets/design/prefix_caching/example-time-6.png)
 
 **Time 7: Request 2 comes in with the 29 prompt tokens, where the first 12 tokens are the same as request 0\.** Note that even the block order in the free queue was `7 - 8 - 9 - 4 - 3 - 2 - 6 - 5 - 1 - 0`, the cache hit blocks (i.e., 0, 1, 2) are touched and removed from the queue before allocation, so the free queue becomes `7 - 8 - 9 - 4 - 3 - 6 - 5`. As a result, the allocated blocks are 0 (cached), 1 (cached), 2 (cached), 7, 8, 9, 4, 3 (evicted).
 
-![Example Time 7](../../assets/design/v1/prefix_caching/example-time-7.png)
+![Example Time 7](../assets/design/prefix_caching/example-time-7.png)

From fb58e3a651f7321eb882ff28018a918b31726c82 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 29 Jul 2025 22:45:41 -0400
Subject: [PATCH 033/224] [Docs] Update docker.md with HF_TOKEN, new model, and
 podman fix (#21856)

---
 docs/deployment/docker.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md
index e500751896b34..5f6cfcb00a37a 100644
--- a/docs/deployment/docker.md
+++ b/docs/deployment/docker.md
@@ -10,23 +10,23 @@ The image can be used to run OpenAI compatible server and is available on Docker
 ```bash
 docker run --runtime nvidia --gpus all \
     -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
     -p 8000:8000 \
     --ipc=host \
     vllm/vllm-openai:latest \
-    --model mistralai/Mistral-7B-v0.1
+    --model Qwen/Qwen3-0.6B
 ```
 
 This image can also be used with other container engines such as [Podman](https://podman.io/).
 
 ```bash
-podman run --gpus all \
+podman run --device nvidia.com/gpu=all \
   -v ~/.cache/huggingface:/root/.cache/huggingface \
   --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
   -p 8000:8000 \
   --ipc=host \
-  vllm/vllm-openai:latest \
-  --model mistralai/Mistral-7B-v0.1
+  docker.io/vllm/vllm-openai:latest \
+  --model Qwen/Qwen3-0.6B
 ```
 
 You can add any other [engine-args](../configuration/engine_args.md) you need after the image tag (`vllm/vllm-openai:latest`).

From b917da442b820245f537602d752e7146e66dd37a Mon Sep 17 00:00:00 2001
From: Csrayz <33659823+Csrayz@users.noreply.github.com>
Date: Wed, 30 Jul 2025 10:46:31 +0800
Subject: [PATCH 034/224] Expose PyTorch profiler configuration to environment
 variables (#21803)

Signed-off-by: Csrayz <33659823+Csrayz@users.noreply.github.com>
---
 docs/contributing/profiling.md |  7 ++++++-
 vllm/envs.py                   | 29 +++++++++++++++++++++++++++++
 vllm/v1/worker/gpu_worker.py   | 15 +++++++++++++--
 vllm/v1/worker/xpu_worker.py   | 13 ++++++++++++-
 4 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/docs/contributing/profiling.md b/docs/contributing/profiling.md
index 7c18b464b576c..74627e9062167 100644
--- a/docs/contributing/profiling.md
+++ b/docs/contributing/profiling.md
@@ -5,7 +5,12 @@
 
 ## Profile with PyTorch Profiler
 
-We support tracing vLLM workers using the `torch.profiler` module. You can enable tracing by setting the `VLLM_TORCH_PROFILER_DIR` environment variable to the directory where you want to save the traces: `VLLM_TORCH_PROFILER_DIR=/mnt/traces/`
+We support tracing vLLM workers using the `torch.profiler` module. You can enable tracing by setting the `VLLM_TORCH_PROFILER_DIR` environment variable to the directory where you want to save the traces: `VLLM_TORCH_PROFILER_DIR=/mnt/traces/`. Additionally, you can control the profiling content by specifying the following environment variables:
+
+- `VLLM_TORCH_PROFILER_RECORD_SHAPES=1` to enable recording Tensor Shapes, off by default
+- `VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY=1` to record memory, off by default
+- `VLLM_TORCH_PROFILER_WITH_STACK=1` to enable recording stack information, on by default
+- `VLLM_TORCH_PROFILER_WITH_FLOPS=1` to enable recording FLOPs, off by default
 
 The OpenAI server also needs to be started with the `VLLM_TORCH_PROFILER_DIR` environment variable set.
 
diff --git a/vllm/envs.py b/vllm/envs.py
index 9b6d8c8be242a..50cb3b7d1b7aa 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -80,6 +80,10 @@ if TYPE_CHECKING:
     VLLM_PLUGINS: Optional[list[str]] = None
     VLLM_LORA_RESOLVER_CACHE_DIR: Optional[str] = None
     VLLM_TORCH_PROFILER_DIR: Optional[str] = None
+    VLLM_TORCH_PROFILER_RECORD_SHAPES: bool = False
+    VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY: bool = False
+    VLLM_TORCH_PROFILER_WITH_STACK: bool = True
+    VLLM_TORCH_PROFILER_WITH_FLOPS: bool = False
     VLLM_USE_TRITON_AWQ: bool = False
     VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
     VLLM_SKIP_P2P_CHECK: bool = False
@@ -629,6 +633,31 @@ environment_variables: dict[str, Callable[[], Any]] = {
     lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
              .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
 
+    # Enable torch profiler to record shapes if set
+    # VLLM_TORCH_PROFILER_RECORD_SHAPES=1. If not set, torch profiler will
+    # not record shapes.
+    "VLLM_TORCH_PROFILER_RECORD_SHAPES":
+    lambda: bool(os.getenv("VLLM_TORCH_PROFILER_RECORD_SHAPES", "0") != "0"),
+
+    # Enable torch profiler to profile memory if set
+    # VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY=1. If not set, torch profiler
+    # will not profile memory.
+    "VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY":
+    lambda: bool(
+        os.getenv("VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY", "0") != "0"),
+
+    # Enable torch profiler to profile stack if set
+    # VLLM_TORCH_PROFILER_WITH_STACK=1. If not set, torch profiler WILL
+    # profile stack by default.
+    "VLLM_TORCH_PROFILER_WITH_STACK":
+    lambda: bool(os.getenv("VLLM_TORCH_PROFILER_WITH_STACK", "1") != "0"),
+
+    # Enable torch profiler to profile flops if set
+    # VLLM_TORCH_PROFILER_WITH_FLOPS=1. If not set, torch profiler will
+    # not profile flops.
+    "VLLM_TORCH_PROFILER_WITH_FLOPS":
+    lambda: bool(os.getenv("VLLM_TORCH_PROFILER_WITH_FLOPS", "0") != "0"),
+
     # If set, vLLM will use Triton implementations of AWQ.
     "VLLM_USE_TRITON_AWQ":
     lambda: bool(int(os.getenv("VLLM_USE_TRITON_AWQ", "0"))),
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
index d9d1f14f0554c..0f46ed223ab88 100644
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -71,12 +71,23 @@ class Worker(WorkerBase):
             torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
             logger.info("Profiling enabled. Traces will be saved to: %s",
                         torch_profiler_trace_dir)
+            logger.debug(
+                "Profiler config: record_shapes=%s,"
+                "profile_memory=%s,with_stack=%s,with_flops=%s",
+                envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
+                envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
+                envs.VLLM_TORCH_PROFILER_WITH_STACK,
+                envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
+            )
             self.profiler = torch.profiler.profile(
                 activities=[
                     torch.profiler.ProfilerActivity.CPU,
                     torch.profiler.ProfilerActivity.CUDA,
                 ],
-                with_stack=True,
+                record_shapes=envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
+                profile_memory=envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
+                with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
+                with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
                 on_trace_ready=torch.profiler.tensorboard_trace_handler(
                     torch_profiler_trace_dir, use_gzip=True))
         else:
@@ -209,7 +220,7 @@ class Worker(WorkerBase):
 
     @torch.inference_mode()
     def determine_available_memory(self) -> int:
-        """Profiles the peak memory usage of the model to determine how much 
+        """Profiles the peak memory usage of the model to determine how much
         memory can be used for KV cache without OOMs.
 
         The engine will first conduct a profiling of the existing memory usage.
diff --git a/vllm/v1/worker/xpu_worker.py b/vllm/v1/worker/xpu_worker.py
index c7885694f7a38..2a7e0625b2f87 100644
--- a/vllm/v1/worker/xpu_worker.py
+++ b/vllm/v1/worker/xpu_worker.py
@@ -41,12 +41,23 @@ class XPUWorker(Worker):
             torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
             logger.info("Profiling enabled. Traces will be saved to: %s",
                         torch_profiler_trace_dir)
+            logger.debug(
+                "Profiler config: record_shapes=%s,"
+                "profile_memory=%s,with_stack=%s,with_flops=%s",
+                envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
+                envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
+                envs.VLLM_TORCH_PROFILER_WITH_STACK,
+                envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
+            )
             self.profiler = torch.profiler.profile(
                 activities=[
                     torch.profiler.ProfilerActivity.CPU,
                     torch.profiler.ProfilerActivity.XPU,
                 ],
-                with_stack=True,
+                record_shapes=envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
+                profile_memory=envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
+                with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
+                with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
                 on_trace_ready=torch.profiler.tensorboard_trace_handler(
                     torch_profiler_trace_dir, use_gzip=True))
         else:

From fdde18229ef32d5872596ce9b004dabf310edbde Mon Sep 17 00:00:00 2001
From: Areeb Syed <areebsyed237@gmail.com>
Date: Wed, 30 Jul 2025 09:05:21 +0530
Subject: [PATCH 035/224] [Bugfix] Fix shape mismatch assertion error when
 loading Gemma3n model with BitsAndBytes quantization (#21808)

Signed-off-by: sydarb <areebsyed237@gmail.com>
---
 vllm/model_executor/models/gemma3n.py | 31 +++++++++++++++++++++------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/vllm/model_executor/models/gemma3n.py b/vllm/model_executor/models/gemma3n.py
index 168665cc29655..d0880103d4e86 100644
--- a/vllm/model_executor/models/gemma3n.py
+++ b/vllm/model_executor/models/gemma3n.py
@@ -167,22 +167,33 @@ class Gemma3nAltUp(nn.Module):
 class Gemma3nLaurelBlock(nn.Module):
     """Learned Augmented Residual Layer"""
 
-    def __init__(self, hidden_size: int, laurel_rank: int, rms_norm_eps: float,
-                 prefix: str):
+    def __init__(
+        self,
+        hidden_size: int,
+        laurel_rank: int,
+        rms_norm_eps: float,
+        *,
+        quant_config: Optional[QuantizationConfig] = None,
+        prefix: str,
+    ) -> None:
         super().__init__()
 
         self.linear_left = ColumnParallelLinear(
             hidden_size,
             laurel_rank,
             bias=False,
+            quant_config=quant_config,
             prefix=f"{prefix}.linear_left",
             return_bias=False,
         )
-        self.linear_right = RowParallelLinear(laurel_rank,
-                                              hidden_size,
-                                              bias=False,
-                                              prefix=f"{prefix}.linear_right",
-                                              return_bias=False)
+        self.linear_right = RowParallelLinear(
+            laurel_rank,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.linear_right",
+            return_bias=False,
+        )
         self.post_laurel_norm = RMSNorm(
             hidden_size=hidden_size,
             eps=rms_norm_eps,
@@ -417,6 +428,7 @@ class Gemma3nDecoderLayer(nn.Module):
             hidden_size=config.hidden_size,
             laurel_rank=config.laurel_rank,
             rms_norm_eps=config.rms_norm_eps,
+            quant_config=quant_config,
             prefix=f"{prefix}.laurel",
         )
 
@@ -427,6 +439,7 @@ class Gemma3nDecoderLayer(nn.Module):
             config.hidden_size,
             config.hidden_size_per_layer_input,
             bias=False,
+            quant_config=quant_config,
             prefix=f"{prefix}.per_layer_input_gate",
             return_bias=False,
         )
@@ -434,6 +447,7 @@ class Gemma3nDecoderLayer(nn.Module):
             config.hidden_size_per_layer_input,
             config.hidden_size,
             bias=False,
+            quant_config=quant_config,
             prefix=f"{prefix}.per_layer_projection",
             return_bias=False,
         )
@@ -547,6 +561,7 @@ class Gemma3nTextModel(nn.Module):
             bias=False,
             gather_output=True,
             return_bias=False,
+            quant_config=quant_config,
             prefix=f"{prefix}.per_layer_model_projection",
         )
         self.per_layer_projection_norm = RMSNorm(
@@ -566,6 +581,7 @@ class Gemma3nTextModel(nn.Module):
                 bias=False,
                 gather_output=True,
                 return_bias=False,
+                quant_config=quant_config,
                 prefix=f"{prefix}.{idx-1}.altup_projections",
             ) for idx in range(1, self.config.altup_num_inputs)
         ])
@@ -576,6 +592,7 @@ class Gemma3nTextModel(nn.Module):
                 bias=False,
                 gather_output=True,
                 return_bias=False,
+                quant_config=quant_config,
                 prefix=f"{prefix}.{idx-1}.altup_unembed_projections",
             ) for idx in range(1, self.config.altup_num_inputs)
         ])

From b7b23da4d25add19411821fa5f784529d4de8732 Mon Sep 17 00:00:00 2001
From: MingzhenHan <hanmingzhen2002@outlook.com>
Date: Wed, 30 Jul 2025 11:35:33 +0800
Subject: [PATCH 036/224] [Bugfix] Fix comment typo of
 get_num_common_prefix_blocks() (#21827)

Signed-off-by: MingzhenHan <hanmingzhen2002@outlook.com>
---
 vllm/v1/core/kv_cache_coordinator.py         | 4 ++--
 vllm/v1/core/single_type_kv_cache_manager.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/v1/core/kv_cache_coordinator.py b/vllm/v1/core/kv_cache_coordinator.py
index 0cce2ec81e08a..258805843e227 100644
--- a/vllm/v1/core/kv_cache_coordinator.py
+++ b/vllm/v1/core/kv_cache_coordinator.py
@@ -130,10 +130,10 @@ class KVCacheCoordinator(ABC):
 
         Args:
             request_id: The request ID.
-            block_hashes: The block hashes of the request.
+            num_running_requests: The number of requests in the RUNNING state.
 
         Returns:
-            The number of common prefix blocks.
+            list[int]: The number of common prefix blocks.
         """
         num_blocks_per_group = [
             manager.get_num_common_prefix_blocks(request_id,
diff --git a/vllm/v1/core/single_type_kv_cache_manager.py b/vllm/v1/core/single_type_kv_cache_manager.py
index e8a44c7773a71..714f49494c9a1 100644
--- a/vllm/v1/core/single_type_kv_cache_manager.py
+++ b/vllm/v1/core/single_type_kv_cache_manager.py
@@ -181,7 +181,7 @@ class SingleTypeKVCacheManager(ABC):
 
         Args:
             request_id: The request ID.
-            block_hashes: The block hashes of the request.
+            num_running_requests: The number of requests in the RUNNING state.
 
         Returns:
             The number of common prefix blocks.

From 44bc46da6008c04d351d8fd0bf026bff8ab57dab Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 30 Jul 2025 11:36:04 +0800
Subject: [PATCH 037/224] [Bugfix] Actually disable processing cache when API
 server is scaled out (#21839)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/entrypoints/cli/serve.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py
index a69363e3d98fe..7dcba2cccdb52 100644
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@@ -140,11 +140,16 @@ def run_multi_api_server(args: argparse.Namespace):
     num_api_servers = args.api_server_count
     assert num_api_servers > 0
 
+    orig_disable_mm_preprocessor_cache = args.disable_mm_preprocessor_cache
+
     # set_process_title("ProcManager")
 
     if num_api_servers > 1:
         setup_multiprocess_prometheus()
 
+        # Not compatible with API server scale-out
+        args.disable_mm_preprocessor_cache = True
+
     listen_address, sock = setup_server(args)
 
     engine_args = vllm.AsyncEngineArgs.from_cli_args(args)
@@ -161,11 +166,9 @@ def run_multi_api_server(args: argparse.Namespace):
                              "with api_server_count > 1")
 
         if model_config.is_multimodal_model and not (
-                model_config.disable_mm_preprocessor_cache):
-            logger.warning(
-                "Multi-model preprocessor cache will be disabled for"
-                " api_server_count > 1")
-            model_config.disable_mm_preprocessor_cache = True
+                orig_disable_mm_preprocessor_cache):
+            logger.warning("Multi-model preprocessor cache will be disabled "
+                           "for api_server_count > 1")
 
     executor_class = Executor.get_class(vllm_config)
     log_stats = not engine_args.disable_log_stats

From 1b0a15553420e5459d9a8512a3f1bd7d4117db08 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Tue, 29 Jul 2025 23:50:46 -0400
Subject: [PATCH 038/224] [Perf] Using `__nv_fp8_e4m3` instead of `c10::e4m3`
 for `per_token_group_quant` (#21867)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 csrc/quantization/fp8/per_token_group_quant.cu | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/csrc/quantization/fp8/per_token_group_quant.cu b/csrc/quantization/fp8/per_token_group_quant.cu
index 2609054f2072b..f5b40e35b6e5a 100644
--- a/csrc/quantization/fp8/per_token_group_quant.cu
+++ b/csrc/quantization/fp8/per_token_group_quant.cu
@@ -1,12 +1,10 @@
 #include <ATen/cuda/CUDAContext.h>
-#include <c10/util/Float8_e4m3fn.h>
 
 #include "../per_token_group_quant_8bit.h"
 
 #include <cmath>
 
-#include <cuda_fp16.h>
-#include <cuda_bf16.h>
+#include <cuda_fp8.h>
 
 #include <torch/all.h>
 
@@ -199,7 +197,7 @@ void per_token_group_quant_8bit(const torch::Tensor& input,
   VLLM_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "per_token_group_quant_8bit", ([&] {
         if (dst_type == at::ScalarType::Float8_e4m3fn) {
-          LAUNCH_KERNEL(scalar_t, c10::Float8_e4m3fn);
+          LAUNCH_KERNEL(scalar_t, __nv_fp8_e4m3);
         } else if (dst_type == at::ScalarType::Char) {
           LAUNCH_KERNEL(scalar_t, int8_t);
         }

From 65f311ce5906941840fb5e16e29e798e7d35cf65 Mon Sep 17 00:00:00 2001
From: "wang.yuqi" <noooop@126.com>
Date: Wed, 30 Jul 2025 11:56:03 +0800
Subject: [PATCH 039/224] [Frontend] Add LLM.reward specific to reward models
 (#21720)

Signed-off-by: wang.yuqi <noooop@126.com>
---
 docs/models/pooling_models.md                 | 81 ++++++++++++-------
 examples/offline_inference/basic/embed.py     |  3 +-
 examples/offline_inference/basic/reward.py    | 53 ++++++++++++
 tests/conftest.py                             |  4 +
 tests/models/language/pooling/test_reward.py  |  2 +-
 .../pooling/test_truncation_control.py        |  6 +-
 vllm/entrypoints/llm.py                       | 60 +++++++++++++-
 7 files changed, 174 insertions(+), 35 deletions(-)
 create mode 100644 examples/offline_inference/basic/reward.py

diff --git a/docs/models/pooling_models.md b/docs/models/pooling_models.md
index f1200103171e9..1fbbba7ace5e1 100644
--- a/docs/models/pooling_models.md
+++ b/docs/models/pooling_models.md
@@ -45,14 +45,14 @@ Each pooling model in vLLM supports one or more of these tasks according to
 [Pooler.get_supported_tasks][vllm.model_executor.layers.pooler.Pooler.get_supported_tasks],
 enabling the corresponding APIs:
 
-| Task       | APIs               |
-|------------|--------------------|
-| `encode`   | `encode`           |
-| `embed`    | `embed`, `score`\* |
-| `classify` | `classify`         |
-| `score`    | `score`            |
+| Task       | APIs                                 |
+|------------|--------------------------------------|
+| `encode`   | `LLM.reward(...)`                    |
+| `embed`    | `LLM.embed(...)`, `LLM.score(...)`\* |
+| `classify` | `LLM.classify(...)`                  |
+| `score`    | `LLM.score(...)`                     |
 
-\* The `score` API falls back to `embed` task if the model does not support `score` task.
+\* The `LLM.score(...)` API falls back to `embed` task if the model does not support `score` task.
 
 ### Pooler Configuration
 
@@ -66,11 +66,11 @@ you can override some of its attributes via the `--override-pooler-config` optio
 If the model has been converted via `--convert` (see above),
 the pooler assigned to each task has the following attributes by default:
 
-| Task       | Pooling Type   | Normalization | Softmax |
-|------------|----------------|---------------|---------|
-| `encode`   | `ALL`          | ❌            | ❌      |
-| `embed`    | `LAST`         | ✅︎            | ❌      |
-| `classify` | `LAST`         | ❌            | ✅︎      |
+| Task       | Pooling Type | Normalization | Softmax |
+|------------|--------------|---------------|---------|
+| `reward`   | `ALL`        | ❌            | ❌     |
+| `embed`    | `LAST`       | ✅︎            | ❌      |
+| `classify` | `LAST`       | ❌            | ✅︎      |
 
 When loading [Sentence Transformers](https://huggingface.co/sentence-transformers) models,
 its Sentence Transformers configuration file (`modules.json`) takes priority over the model's defaults.
@@ -83,21 +83,6 @@ which takes priority over both the model's and Sentence Transformers's defaults.
 The [LLM][vllm.LLM] class provides various methods for offline inference.
 See [configuration][configuration] for a list of options when initializing the model.
 
-### `LLM.encode`
-
-The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
-It returns the extracted hidden states directly, which is useful for reward models.
-
-```python
-from vllm import LLM
-
-llm = LLM(model="Qwen/Qwen2.5-Math-RM-72B", runner="pooling")
-(output,) = llm.encode("Hello, my name is")
-
-data = output.outputs.data
-print(f"Data: {data!r}")
-```
-
 ### `LLM.embed`
 
 The [embed][vllm.LLM.embed] method outputs an embedding vector for each prompt.
@@ -106,7 +91,7 @@ It is primarily designed for embedding models.
 ```python
 from vllm import LLM
 
-llm = LLM(model="intfloat/e5-mistral-7b-instruct", runner="pooling")
+llm = LLM(model="intfloat/e5-small", runner="pooling")
 (output,) = llm.embed("Hello, my name is")
 
 embeds = output.outputs.embedding
@@ -154,6 +139,46 @@ print(f"Score: {score}")
 
 A code example can be found here: <gh-file:examples/offline_inference/basic/score.py>
 
+### `LLM.reward`
+
+The [reward][vllm.LLM.reward] method is available to all reward models in vLLM.
+It returns the extracted hidden states directly.
+
+```python
+from vllm import LLM
+
+llm = LLM(model="internlm/internlm2-1_8b-reward", runner="pooling", trust_remote_code=True)
+(output,) = llm.reward("Hello, my name is")
+
+data = output.outputs.data
+print(f"Data: {data!r}")
+```
+
+A code example can be found here: <gh-file:examples/offline_inference/basic/reward.py>
+
+### `LLM.encode`
+
+The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
+It returns the extracted hidden states directly.
+
+!!! note
+    Please use one of the more specific methods or set the task directly when using `LLM.encode`:
+
+    - For embeddings, use `LLM.embed(...)` or `pooling_task="embed"`.
+    - For classification logits, use `LLM.classify(...)` or `pooling_task="classify"`.
+    - For rewards, use `LLM.reward(...)` or `pooling_task="reward"`.
+    - For similarity scores, use `LLM.score(...)`.  
+
+```python
+from vllm import LLM
+
+llm = LLM(model="intfloat/e5-small", runner="pooling")
+(output,) = llm.encode("Hello, my name is", pooling_task="embed")
+
+data = output.outputs.data
+print(f"Data: {data!r}")
+```
+
 ## Online Serving
 
 Our [OpenAI-Compatible Server](../serving/openai_compatible_server.md) provides endpoints that correspond to the offline APIs:
diff --git a/examples/offline_inference/basic/embed.py b/examples/offline_inference/basic/embed.py
index 526753bcef22f..158836728beed 100644
--- a/examples/offline_inference/basic/embed.py
+++ b/examples/offline_inference/basic/embed.py
@@ -12,10 +12,9 @@ def parse_args():
     parser = EngineArgs.add_cli_args(parser)
     # Set example specific arguments
     parser.set_defaults(
-        model="intfloat/e5-mistral-7b-instruct",
+        model="intfloat/e5-small",
         runner="pooling",
         enforce_eager=True,
-        max_model_len=1024,
     )
     return parser.parse_args()
 
diff --git a/examples/offline_inference/basic/reward.py b/examples/offline_inference/basic/reward.py
new file mode 100644
index 0000000000000..aa173cf96f5bc
--- /dev/null
+++ b/examples/offline_inference/basic/reward.py
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from argparse import Namespace
+
+from vllm import LLM, EngineArgs
+from vllm.utils import FlexibleArgumentParser
+
+
+def parse_args():
+    parser = FlexibleArgumentParser()
+    parser = EngineArgs.add_cli_args(parser)
+    # Set example specific arguments
+    parser.set_defaults(
+        model="internlm/internlm2-1_8b-reward",
+        runner="pooling",
+        enforce_eager=True,
+        max_model_len=1024,
+        trust_remote_code=True,
+    )
+    return parser.parse_args()
+
+
+def main(args: Namespace):
+    # Sample prompts.
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
+
+    # Create an LLM.
+    # You should pass runner="pooling" for reward models
+    llm = LLM(**vars(args))
+
+    # Generate rewards. The output is a list of PoolingRequestOutput.
+    outputs = llm.reward(prompts)
+
+    # Print the outputs.
+    print("\nGenerated Outputs:\n" + "-" * 60)
+    for prompt, output in zip(prompts, outputs):
+        rewards = output.outputs.data
+        rewards_trimmed = (
+            (str(rewards[:16])[:-1] + ", ...]") if len(rewards) > 16 else rewards
+        )
+        print(f"Prompt: {prompt!r} \nReward: {rewards_trimmed} (size={len(rewards)})")
+        print("-" * 60)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/tests/conftest.py b/tests/conftest.py
index e4df6ebf2c260..67f0e7424038c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1053,6 +1053,10 @@ class VllmRunner:
         req_outputs = self.llm.encode(prompts)
         return [req_output.outputs.data for req_output in req_outputs]
 
+    def reward(self, prompts: list[str]) -> list[list[float]]:
+        req_outputs = self.llm.reward(prompts)
+        return [req_output.outputs.data for req_output in req_outputs]
+
     def score(
         self,
         text_1: Union[str, list[str]],
diff --git a/tests/models/language/pooling/test_reward.py b/tests/models/language/pooling/test_reward.py
index 3b7fab3ba5c99..a5f7dca76d822 100644
--- a/tests/models/language/pooling/test_reward.py
+++ b/tests/models/language/pooling/test_reward.py
@@ -95,7 +95,7 @@ def test_prm_models(
         monkeypatch.setenv("VLLM_USE_TRITON_FLASH_ATTN", "False")
 
     with vllm_runner(model, max_model_len=1024, dtype=dtype) as vllm_model:
-        vllm_outputs = vllm_model.encode(math_step_prompts)
+        vllm_outputs = vllm_model.reward(math_step_prompts)
 
     with hf_runner(model, dtype=dtype, auto_cls=AutoModel) as hf_model:
         hf_model = step_reward_patch_hf_model(hf_model)
diff --git a/tests/models/language/pooling/test_truncation_control.py b/tests/models/language/pooling/test_truncation_control.py
index dc2bf21ef63bc..c6ef899958a07 100644
--- a/tests/models/language/pooling/test_truncation_control.py
+++ b/tests/models/language/pooling/test_truncation_control.py
@@ -28,7 +28,7 @@ def test_smaller_truncation_size(vllm_runner,
 
     with vllm_runner(model_name, runner="pooling",
                      max_model_len=max_model_len) as vllm_model:
-        vllm_output = vllm_model.llm.encode(
+        vllm_output = vllm_model.llm.embed(
             input_str, truncate_prompt_tokens=truncate_prompt_tokens)
 
     prompt_tokens = vllm_output[0].prompt_token_ids
@@ -43,7 +43,7 @@ def test_max_truncation_size(vllm_runner,
 
     with vllm_runner(model_name, runner="pooling",
                      max_model_len=max_model_len) as vllm_model:
-        vllm_output = vllm_model.llm.encode(
+        vllm_output = vllm_model.llm.embed(
             input_str, truncate_prompt_tokens=truncate_prompt_tokens)
 
     prompt_tokens = vllm_output[0].prompt_token_ids
@@ -61,7 +61,7 @@ def test_bigger_truncation_size(vllm_runner,
             model_name, runner="pooling",
             max_model_len=max_model_len) as vllm_model:
 
-        llm_output = vllm_model.llm.encode(
+        llm_output = vllm_model.llm.embed(
             input_str, truncate_prompt_tokens=truncate_prompt_tokens)
 
         assert llm_output == f"""truncate_prompt_tokens value 
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index adef350931f3d..842a22ccebaa4 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1037,7 +1037,7 @@ class LLM:
         truncate_prompt_tokens: Optional[int] = None,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        pooling_task: PoolingTask = "encode",
+        pooling_task: Optional[PoolingTask] = None,
         tokenization_kwargs: Optional[dict[str, Any]] = None,
     ) -> list[PoolingRequestOutput]:
         """Apply pooling to the hidden states corresponding to the input
@@ -1069,6 +1069,25 @@ class LLM:
             considered legacy and may be deprecated in the future. You should
             instead pass them via the `inputs` parameter.
         """
+        if pooling_task is None:
+            if "embed" in self.supported_tasks:
+                pooling_task = "embed"
+            else:
+                pooling_task = "encode"
+
+            logger.warning_once(
+                "`LLM.encode` is currently using `pooling_task = %s`.\n"
+                "Please use one of the more specific methods or set the "
+                "task directly when using `LLM.encode`:\n"
+                "  - For embeddings, use `LLM.embed(...)` "
+                "or `pooling_task=\"embed\"`.\n"
+                "  - For classification logits, use `LLM.classify(...)` "
+                "or `pooling_task=\"classify\"`.\n"
+                "  - For rewards, use `LLM.reward(...)` "
+                "or `pooling_task=\"reward\"`\n"
+                "  - For similarity scores, use `LLM.score(...)`.",
+                pooling_task)
+
         model_config = self.llm_engine.model_config
         runner_type = model_config.runner_type
         if runner_type != "pooling":
@@ -1207,6 +1226,45 @@ class LLM:
 
         return [ClassificationRequestOutput.from_base(item) for item in items]
 
+    def reward(
+        self,
+        prompts: Union[PromptType, Sequence[PromptType]],
+        /,
+        *,
+        truncate_prompt_tokens: Optional[int] = None,
+        use_tqdm: Union[bool, Callable[..., tqdm]] = True,
+        pooling_params: Optional[Union[PoolingParams,
+                                       Sequence[PoolingParams]]] = None,
+        lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
+    ) -> list[PoolingRequestOutput]:
+        """
+        Generate rewards for each prompt.
+
+        Args:
+            prompts: The prompts to the LLM. You may pass a sequence of prompts
+                for batch inference. See [PromptType][vllm.inputs.PromptType]
+                for more details about the format of each prompts.
+            use_tqdm: If `True`, shows a tqdm progress bar.
+                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
+                it is used to create the progress bar.
+                If `False`, no progress bar is created.
+            lora_request: LoRA request to use for generation, if any.
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
+        Returns:
+            A list of `PoolingRequestOutput` objects containing the
+            pooled hidden states in the same order as the input prompts.
+        """
+
+        return self.encode(
+            prompts,
+            use_tqdm=use_tqdm,
+            lora_request=lora_request,
+            pooling_params=pooling_params,
+            truncate_prompt_tokens=truncate_prompt_tokens,
+            pooling_task="encode",
+        )
+
     def _embedding_score(
         self,
         tokenizer: AnyTokenizer,

From 05cbbe20c55d957f18c12f7eb11cf551504e657d Mon Sep 17 00:00:00 2001
From: Kunshang Ji <kunshang.ji@intel.com>
Date: Wed, 30 Jul 2025 11:56:14 +0800
Subject: [PATCH 040/224] [XPU] use `ZE_AFFINITY_MASK` for device select on xpu
 (#21815)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
---
 vllm/platforms/xpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
index 1d0bb3654929b..d8a663f2f0c4a 100644
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -30,7 +30,7 @@ class XPUPlatform(Platform):
     # see https://github.com/ray-project/ray/blob/6a5eb5865eeb9ccf058a79b44f107e327e360673/python/ray/_private/accelerators/intel_gpu.py#L20 # noqa: E501
     ray_device_key: str = "GPU"
     dist_backend: str = "ccl"  # ccl | xccl
-    device_control_env_var: str = "ONEAPI_DEVICE_SELECTOR"
+    device_control_env_var: str = "ZE_AFFINITY_MASK"
 
     @classmethod
     def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int,

From e3bc17ceead9af80851b61a65010613eb34511e8 Mon Sep 17 00:00:00 2001
From: Tao He <linzhu.ht@alibaba-inc.com>
Date: Wed, 30 Jul 2025 12:30:44 +0800
Subject: [PATCH 041/224] Add @sighingnow as maintainer of qwen's related
 files. (#21895)

Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com>
---
 .github/CODEOWNERS | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index a3b2713430eb5..fb9f44353cec8 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -61,3 +61,7 @@ mkdocs.yaml @hmellor
 /vllm/v1/worker/^xpu @jikunshang
 /vllm/platforms/xpu.py @jikunshang
 /docker/Dockerfile.xpu @jikunshang
+
+# Qwen-specific files
+/vllm/attention/backends/dual_chunk_flash_attn.py @sighingnow
+/vllm/model_executor/models/qwen* @sighingnow

From 16f32505275687c01823b87134ce2d93f89407ad Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 30 Jul 2025 12:53:08 +0800
Subject: [PATCH 042/224] [CI/Build] Fix pre-commit failure in docs (#21897)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 docs/design/fused_moe_modular_kernel.md | 63 +++++++++++++++++--------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/docs/design/fused_moe_modular_kernel.md b/docs/design/fused_moe_modular_kernel.md
index 0943454d64292..3ef1232051b07 100644
--- a/docs/design/fused_moe_modular_kernel.md
+++ b/docs/design/fused_moe_modular_kernel.md
@@ -1,6 +1,7 @@
 # Fused MoE Modular Kernel
 
 ## Introduction
+
 FusedMoEModularKernel is implemented [here](gh-file:/vllm/model_executor/layers/fused_moe/modular_kernel.py)
 
 Based on the format of the input activations, FusedMoE implementations are broadly classified into 2 types.
@@ -31,7 +32,8 @@ As can be seen from the diagrams, there are a lot of operations and there can be
 
 The rest of the document will focus on the Contiguous / Non-Batched case. Extrapolating to the Batched case should be straight-forward.
 
-## ModularKernel Components:
+## ModularKernel Components
+
 FusedMoEModularKernel splits the FusedMoE operation into 3 parts,
 
 1. TopKWeightAndReduce
@@ -39,6 +41,7 @@ FusedMoEModularKernel splits the FusedMoE operation into 3 parts,
 3. FusedMoEPermuteExpertsUnpermute
 
 ### TopKWeightAndReduce
+
 The TopK Weight Application and Reduction components happen right after the Unpermute operation and before the All2All Combine. Note that the `FusedMoEPermuteExpertsUnpermute` is responsible for the Unpermute and `FusedMoEPrepareAndFinalize` is responsible for the All2All Combine. There is value in doing the TopK Weight Application and Reduction in the `FusedMoEPermuteExpertsUnpermute`. But some implementations choose to do it `FusedMoEPrepareAndFinalize`. In order to enable this flexibility, we have a TopKWeightAndReduce abstract class.
 
 Please find the implementations of TopKWeightAndReduce [here](gh-file:vllm/model_executor/layers/fused_moe/topk_weight_and_reduce.py).
@@ -50,12 +53,14 @@ The `FusedMoEModularKernel` acts as a bridge between the `FusedMoEPermuteExperts
 * `FusedMoEPermuteExpertsUnpermute::finalize_weight_and_reduce_impl` method returns `TopKWeightAndReduceContiguous` / `TopKWeightAndReduceNaiveBatched` / `TopKWeightAndReduceDelegate` if the `FusedMoEPermuteExpertsUnpermute` implementation needs the `FusedMoEPrepareAndFinalize::finalize()` to do the weight application and reduction.
 
 ### FusedMoEPrepareAndFinalize
+
 The `FusedMoEPrepareAndFinalize` abstract class exposes `prepare` and `finalize` functions.
 The `prepare` function is responsible for input activation Quantization and All2All Dispatch. The `finalize` function is responsible for invoking the All2All Combine. Additionally the `finalize` function may or may not do the TopK weight application and reduction (Please refer to the TopKWeightAndReduce section)
 
 ![](../assets/design/fused_moe_modular_kernel/prepare_and_finalize_blocks.png "FusedMoEPrepareAndFinalize Blocks")
 
 ### FusedMoEPermuteExpertsUnpermute
+
 The `FusedMoEPermuteExpertsUnpermute` class is where the crux of the MoE operations happen. The `FusedMoEPermuteExpertsUnpermute` abstract class exposes a few important functions,
 
 * apply()
@@ -63,6 +68,7 @@ The `FusedMoEPermuteExpertsUnpermute` class is where the crux of the MoE operati
 * finalize_weight_and_reduce_impl()
 
 #### apply()
+
 The `apply` method is where the implementations perform
 
 * Permute
@@ -74,50 +80,56 @@ The `apply` method is where the implementations perform
 * Maybe TopK Weight Application + Reduction
 
 #### workspace_shapes()
+
 The core FusedMoE implementation performs a series of operations. It would be inefficient to create output memory for each of these operations separately. To that effect, implementations are required to declare 2 workspace shapes, the workspace datatype and the FusedMoE output shape as outputs of the workspace_shapes() method. This information is used to allocate the workspace tensors and the output tensor in `FusedMoEModularKernel::forward()` and passed on to the `FusedMoEPermuteExpertsUnpermute::apply()` method. The workspaces could then be used as intermediate buffers in the FusedMoE implementation.
 
 #### finalize_weight_and_reduce_impl()
+
 It is sometimes efficient to perform TopK weight application and Reduction inside the `FusedMoEPermuteExpertsUnpermute::apply()`. Find an example [here](https://github.com/vllm-project/vllm/pull/20228). We have a `TopKWeightAndReduce` abstract class to facilitate such implementations. Please refer to the TopKWeightAndReduce section.
 `FusedMoEPermuteExpertsUnpermute::finalize_weight_and_reduce_impl()` returns the `TopKWeightAndReduce` object that the implementation wants the `FusedMoEPrepareAndFinalize::finalize()` to use.
 
 ![](../assets/design/fused_moe_modular_kernel/fused_experts_blocks.png "FusedMoEPermuteExpertsUnpermute Blocks")
 
 ### FusedMoEModularKernel
+
 `FusedMoEModularKernel` is composed of the `FusedMoEPrepareAndFinalize` and `FusedMoEPermuteExpertsUnpermute` objects.
 `FusedMoEModularKernel` pseudocode/sketch,
 
-```
-FusedMoEModularKernel::__init__(self,
-            prepare_finalize: FusedMoEPrepareAndFinalize,
-            fused_experts: FusedMoEPermuteExpertsUnpermute):
+```py
+class FusedMoEModularKernel:
+    def __init__(self,
+                 prepare_finalize: FusedMoEPrepareAndFinalize,
+                 fused_experts: FusedMoEPermuteExpertsUnpermute):
 
-    self.prepare_finalize = prepare_finalize
-    self.fused_experts = fused_experts
+        self.prepare_finalize = prepare_finalize
+        self.fused_experts = fused_experts
 
-FusedMoEModularKernel::forward(self, DP_A):
+    def forward(self, DP_A):
 
-    Aq, A_scale, _, _, _ = self.prepare_finalize.prepare(DP_A, ...)
+        Aq, A_scale, _, _, _ = self.prepare_finalize.prepare(DP_A, ...)
 
-    workspace13_shape, workspace2_shape, _, _ = self.fused_experts.workspace_shapes(...)
+        workspace13_shape, workspace2_shape, _, _ = self.fused_experts.workspace_shapes(...)
 
-    # allocate workspaces
-    workspace_13 = torch.empty(workspace13_shape, ...)
-    workspace_2 = torch.empty(workspace2_shape, ...)
+        # allocate workspaces
+        workspace_13 = torch.empty(workspace13_shape, ...)
+        workspace_2 = torch.empty(workspace2_shape, ...)
 
-    # execute fused_experts
-    fe_out = self.fused_experts.apply(Aq, A_scale, workspace13, workspace2, ...)
+        # execute fused_experts
+        fe_out = self.fused_experts.apply(Aq, A_scale, workspace13, workspace2, ...)
 
-    # war_impl is an object of type TopKWeightAndReduceNoOp if the fused_experts implementations performs the TopK Weight Application and Reduction.
-    war_impl = self.fused_experts.finalize_weight_and_reduce_impl()
+        # war_impl is an object of type TopKWeightAndReduceNoOp if the fused_experts implementations
+        # performs the TopK Weight Application and Reduction.
+        war_impl = self.fused_experts.finalize_weight_and_reduce_impl()
 
-    output = self.prepare_finalize.finalize(fe_out, war_impl,...)
-                            
-    return output
+        output = self.prepare_finalize.finalize(fe_out, war_impl,...)
+
+        return output
 ```
 
 ## How-To
 
 ### How To Add a FusedMoEPrepareAndFinalize Type
+
 Typically a FusedMoEPrepareAndFinalize type is backed by an All2All Dispatch & Combine implementation / kernel. For example,
 
 * PplxPrepareAndFinalize type is backed by Pplx All2All kernels,
@@ -125,9 +137,11 @@ Typically a FusedMoEPrepareAndFinalize type is backed by an All2All Dispatch & C
 * DeepEPLLPrepareAndFinalize type is backed by DeepEP Low-Latency All2All kernels.
 
 #### Step 1: Add an All2All manager
+
 The purpose of the All2All Manager is to setup the All2All kernel implementations. The `FusedMoEPrepareAndFinalize` implementations typically fetch a kernel-implementation "handle" from the All2All Manager to invoke the Dispatch and Combine functions. Please look at the All2All Manager implementations [here](gh-file:vllm/distributed/device_communicators/all2all.py).
 
 #### Step 2: Add a FusedMoEPrepareAndFinalize Type
+
 This section describes the significance of the various functions exposed by the `FusedMoEPrepareAndFinalize` abstract class.
 
 `FusedMoEPrepareAndFinalize::prepare()`: The prepare method implements the Quantization and All2All Dispatch. Typically the Dispatch function from the relevant All2All Manager is invoked.
@@ -145,6 +159,7 @@ This section describes the significance of the various functions exposed by the
 We suggest picking an already existing `FusedMoEPrepareAndFinalize` implementation that matches your All2All implementation closely and using it as a reference.
 
 ### How To Add a FusedMoEPermuteExpertsUnpermute Type
+
 FusedMoEPermuteExpertsUnpermute performs the core of the FusedMoE operations. The various functions exposed by the abstract class and their significance is as follows,
 
 `FusedMoEPermuteExpertsUnpermute::activation_formats()`: Return the supported Input and Output activation formats. i.e. Contiguous / Batched format.
@@ -159,12 +174,14 @@ implementations that input `FusedMoEActivationFormat.Standard` support chunking
 `FusedMoEPermuteExpertsUnpermute::apply`: Refer to `FusedMoEPermuteExpertsUnpermute` section above.
 
 ### FusedMoEModularKernel Initialization
+
 `FusedMoEMethodBase` class has 2 methods that are collectively responsible in creating the `FusedMoEModularKernel` object. They are,
 
 * select_gemm_impl, and
 * init_prepare_finalize
 
 #### select_gemm_impl
+
 The `select_gemm_impl` method is undefined in the base class. It is the responsibility of the derived class to implement a method that constructs a valid/appropriate `FusedMoEPermuteExpertsUnpermute` object.
 Please refer to the implementations in,
 
@@ -176,12 +193,14 @@ Please refer to the implementations in,
 dervied classes.
 
 #### init_prepare_finalize
+
 Based on the input and env settings, the `init_prepare_finalize` method creates the appropriate `FusedMoEPrepareAndFinalize` object. The method then queries `select_gemm_impl` for the appropriate `FusedMoEPermuteExpertsUnpermute` object and builds the `FusedMoEModularKernel` object
 
 Please take a look at [init_prepare_finalize](https://github.com/vllm-project/vllm/blob/1cbf951ba272c230823b947631065b826409fa62/vllm/model_executor/layers/fused_moe/layer.py#L188).
 **Important**: The `FusedMoEMethodBase` derived classes use the `FusedMoEMethodBase::fused_experts` object in their `apply` methods. When settings permit the construction of a valid `FusedMoEModularKernel` object, we override `FusedMoEMethodBase::fused_experts` with it. This essentially makes the derived classes agnostic to what FusedMoE implementation is used.
 
 ### How To Unit Test
+
 We have `FusedMoEModularKernel` unit tests at [test_modular_kernel_combinations.py](gh-file:tests/kernels/moe/test_modular_kernel_combinations.py).
 
 The unit test iterates through all combinations of `FusedMoEPrepareAndFinalize` and `FusedMoEPremuteExpertsUnpermute` types and if they are
@@ -196,18 +215,21 @@ If you are adding some `FusedMoEPrepareAndFinalize` / `FusedMoEPermuteExpertsUnp
 Doing this will add the new implementation to the test suite.
 
 ### How To Check `FusedMoEPrepareAndFinalize` & `FusedMoEPermuteExpertsUnpermute` Compatibility
+
 The unit test file [test_modular_kernel_combinations.py](gh-file:tests/kernels/moe/test_modular_kernel_combinations.py) can also be executed as a standalone script.
 Example: `python3 -m tests.kernels.moe.test_modular_kernel_combinations --pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts`
 As a side-effect, this script can be used to test `FusedMoEPrepareAndFinalize` & `FusedMoEPermuteExpertsUnpermute` compatibility. When invoked
 with incompatible types, the script will error.
 
 ### How To Profile
+
 Please take a look at [profile_modular_kernel.py](gh-file:tests/kernels/moe/modular_kernel_tools/profile_modular_kernel.py)
 The script can be used to generate Torch traces for a single `FusedMoEModularKernel::forward()` call for any compatible
 `FusedMoEPrepareAndFinalize` and `FusedMoEPermuteExpertsUnpermute` types.
 Example: `python3 -m tests.kernels.moe.modular_kernel_tools.profile_modular_kernel --pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts`
 
 ## FusedMoEPrepareAndFinalize Implementations
+
 The following table lists the `FusedMoEPrepareAndFinalize` implementations at the time of writing,
 
 | Implementation | Type | Comments |
@@ -220,6 +242,7 @@ The following table lists the `FusedMoEPrepareAndFinalize` implementations at th
 | BatchedPrepareAndFinalize | Batched | A reference prepare/finalize class that reorganizes the tokens into expert batched format, i.e. E x max_num_tokens x K. (Doesn’t use any all2all kernels. This is primarily used in unit testing) |
 
 ## FusedMoEPermuteExpertsUnpermute
+
 The following table lists the `FusedMoEPermuteExpertsUnpermute` implementations at the time of writing,
 
 | Implementation | Type | Comment |

From 4cd7fe6ceaf5ad7d8ac2ba5597cd964c6db7e306 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <crypdick@users.noreply.github.com>
Date: Tue, 29 Jul 2025 22:07:28 -0700
Subject: [PATCH 043/224] [Docs] Expand introduction to Ray in Multi-node
 deployment section (#21584)

Signed-off-by: Ricardo Decal <rdecal@anyscale.com>
---
 docs/serving/distributed_serving.md | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/serving/distributed_serving.md b/docs/serving/distributed_serving.md
index 9304976572788..08d889a00d2cf 100644
--- a/docs/serving/distributed_serving.md
+++ b/docs/serving/distributed_serving.md
@@ -58,7 +58,17 @@ vllm serve gpt2 \
 
 ## Multi-node deployment
 
-If a single node lacks sufficient GPUs to hold the model, deploy vLLM across multiple nodes. Multi-node deployments require Ray as the runtime engine. Ensure that every node provides an identical execution environment, including the model path and Python packages. Using container images is recommended because they provide a convenient way to keep environments consistent and to hide host heterogeneity.
+If a single node lacks sufficient GPUs to hold the model, deploy vLLM across multiple nodes. Ensure that every node provides an identical execution environment, including the model path and Python packages. Using container images is recommended because they provide a convenient way to keep environments consistent and to hide host heterogeneity.
+
+### What is Ray?
+
+Ray is a distributed computing framework for scaling Python programs. Multi-node vLLM deployments require Ray as the runtime engine.
+
+vLLM uses Ray to manage the distributed execution of tasks across multiple nodes and control where execution happens.
+
+Ray also offers high-level APIs for large-scale [offline batch inference](https://docs.ray.io/en/latest/data/working-with-llms.html) and [online serving](https://docs.ray.io/en/latest/serve/llm/serving-llms.html) that can leverage vLLM as the engine. These APIs add production-grade fault tolerance, scaling, and distributed observability to vLLM workloads.
+
+For details, see the [Ray documentation](https://docs.ray.io/en/latest/index.html).
 
 ### Ray cluster setup with containers
 

From 6f8d26188200385fa994526b10b5858a3da1ede7 Mon Sep 17 00:00:00 2001
From: Louie Tsai <louie.tsai@intel.com>
Date: Tue, 29 Jul 2025 22:57:03 -0700
Subject: [PATCH 044/224] Update vLLM Benchmark Suite for Xeon based on 0.9.2
 release  (#21486)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
---
 .../convert-results-json-to-markdown.py       |   1 +
 .../scripts/run-performance-benchmarks.sh     |   2 +-
 .../tests/serving-tests-cpu-snc2.json         | 209 +++++++++++++++++
 .../tests/serving-tests-cpu-snc3.json         | 211 ++++++++++++++++++
 .../tests/serving-tests-cpu.json              |  15 ++
 5 files changed, 437 insertions(+), 1 deletion(-)
 create mode 100644 .buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
 create mode 100644 .buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json

diff --git a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
index 05623879c0c2c..554256b4bdb8b 100644
--- a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
+++ b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
@@ -44,6 +44,7 @@ serving_column_mapping = {
     "test_name": "Test name",
     "gpu_type": "GPU",
     "completed": "# of req.",
+    "max_concurrency": "# of max concurrency.",
     "request_throughput": "Tput (req/s)",
     "total_token_throughput": "Total Token Tput (tok/s)",
     "output_throughput": "Output Tput (tok/s)",
diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
index b515ee43934d1..2c57666a81aa3 100644
--- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
+++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
@@ -33,7 +33,7 @@ check_gpus() {
 
 check_cpus() {
   # check the number of CPUs and NUMA Node and GPU type.
-  declare -g numa_count=$(python3 -c  "from numa import info;numa_size = info.get_num_configured_nodes(); print(numa_size)")
+  declare -g numa_count=$(lscpu | grep "NUMA node(s):" | awk '{print $3}')
   if [[ $numa_count -gt 0 ]]; then
     echo "NUMA found."
     echo $numa_count
diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
new file mode 100644
index 0000000000000..a144b4420fbf1
--- /dev/null
+++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
@@ -0,0 +1,209 @@
+[
+    {
+        "test_name": "serving_llama8B_tp1_sharegpt",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "tensor_parallel_size": 1,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "sharegpt",
+            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+	    "max_concurrency": 60,
+            "num_prompts": 200
+        }
+    },
+    {
+        "test_name": "serving_llama8B_tp2_sharegpt",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "tensor_parallel_size": 2,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "sharegpt",
+            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+	    "max_concurrency": 60,
+            "num_prompts": 200
+        }
+    },
+    {
+        "test_name": "serving_llama8B_tp4_sharegpt",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "tensor_parallel_size": 4,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "sharegpt",
+            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+	    "max_concurrency": 60,
+            "num_prompts": 200
+        }
+    },
+    {
+        "test_name": "serving_llama8B_tp1_random_128_128",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "tensor_parallel_size": 1,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+	    "enable_chunked_prefill": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "random",
+	    "random-input-len": 128,
+	    "random-output-len": 128,
+	    "ignore-eos": "",
+	    "max_concurrency": 1000,
+            "num_prompts": 1000
+        }
+    },
+    {
+        "test_name": "serving_llama8B_tp2_random_128_128",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "tensor_parallel_size": 2,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+	    "enable_chunked_prefill": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "random",
+	    "random-input-len": 128,
+	    "random-output-len": 128,
+	    "ignore-eos": "",
+	    "max_concurrency": 1000,
+            "num_prompts": 1000
+        }
+    },
+    {
+        "test_name": "serving_llama8B_tp4_random_128_128",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "tensor_parallel_size": 4,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+	    "enable_chunked_prefill": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "random",
+	    "random-input-len": 128,
+	    "random-output-len": 128,
+	    "ignore-eos": "",
+	    "max_concurrency": 1000,
+            "num_prompts": 1000
+        }
+    }
+]
diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json
new file mode 100644
index 0000000000000..e6e69b63b74df
--- /dev/null
+++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json
@@ -0,0 +1,211 @@
+[
+    {
+        "test_name": "serving_llama8B_pp1_sharegpt",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "pipeline_parallel_size": 1,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "sharegpt",
+            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+	    "max_concurrency": 60,
+            "num_prompts": 200
+        }
+    },
+    {
+        "test_name": "serving_llama8B_pp3_sharegpt",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "pipeline_parallel_size": 3,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "sharegpt",
+            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+	    "max_concurrency": 60,
+            "num_prompts": 200
+        }
+    },
+    {
+        "test_name": "serving_llama8B_tp2pp6_sharegpt",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "tensor_parallel_size": 2,
+            "pipeline_parallel_size": 3,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "sharegpt",
+            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+	    "max_concurrency": 60,
+            "num_prompts": 200
+        }
+    },
+    {
+        "test_name": "serving_llama8B_pp1_random_128_128",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "pipeline_parallel_size": 1,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+	    "enable_chunked_prefill": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "random",
+	    "random-input-len": 128,
+	    "random-output-len": 128,
+	    "ignore-eos": "",
+	    "max_concurrency": 1000,
+            "num_prompts": 1000
+        }
+    },
+    {
+        "test_name": "serving_llama8B_pp3_random_128_128",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL:": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "pipeline_parallel_size": 3,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+	    "enable_chunked_prefill": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "random",
+	    "random-input-len": 128,
+	    "random-output-len": 128,
+	    "ignore-eos": "",
+	    "max_concurrency": 1000,
+            "num_prompts": 1000
+        }
+    },
+    {
+        "test_name": "serving_llama8B_tp2pp3_random_128_128",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_environment_variables": {
+            "VLLM_RPC_TIMEOUT": 100000,
+	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
+	    "VLLM_CPU_KVCACHE_SPACE": 40
+        },
+        "server_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "tensor_parallel_size": 2,
+            "pipeline_parallel_size": 3,
+	    "dtype": "bfloat16",
+	    "distributed_executor_backend": "mp",
+	    "block_size": 128,
+	    "trust_remote_code": "",
+	    "enable_chunked_prefill": "",
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "backend": "vllm",
+            "dataset_name": "random",
+	    "random-input-len": 128,
+	    "random-output-len": 128,
+	    "ignore-eos": "",
+	    "max_concurrency": 1000,
+            "num_prompts": 1000
+        }
+    }
+]
diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
index 22f71c993ff33..ce1f924de387f 100644
--- a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
+++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
@@ -6,6 +6,7 @@
             "VLLM_RPC_TIMEOUT": 100000,
 	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
 	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
 	    "VLLM_CPU_KVCACHE_SPACE": 40
         },
         "server_parameters": {
@@ -18,6 +19,8 @@
             "disable_log_stats": "",
             "disable_log_requests": "",
 	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
             "load_format": "dummy"
         },
         "client_parameters": {
@@ -36,6 +39,7 @@
             "VLLM_RPC_TIMEOUT": 100000,
 	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
 	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
 	    "VLLM_CPU_KVCACHE_SPACE": 40
         },
         "server_parameters": {
@@ -48,6 +52,8 @@
             "disable_log_stats": "",
             "disable_log_requests": "",
 	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
             "load_format": "dummy"
         },
         "client_parameters": {
@@ -66,6 +72,7 @@
             "VLLM_RPC_TIMEOUT": 100000,
 	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
 	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
 	    "VLLM_CPU_KVCACHE_SPACE": 40
         },
         "server_parameters": {
@@ -78,6 +85,8 @@
             "disable_log_stats": "",
             "disable_log_requests": "",
 	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
             "load_format": "dummy"
         },
         "client_parameters": {
@@ -96,6 +105,7 @@
             "VLLM_RPC_TIMEOUT": 100000,
 	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
 	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
 	    "VLLM_CPU_KVCACHE_SPACE": 40
         },
         "server_parameters": {
@@ -109,6 +119,8 @@
             "disable_log_stats": "",
             "disable_log_requests": "",
 	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
             "load_format": "dummy"
         },
         "client_parameters": {
@@ -129,6 +141,7 @@
             "VLLM_RPC_TIMEOUT": 100000,
 	    "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
 	    "VLLM_ENGINE_ITERATION_TIMEOUT_S": 120,
+	    "VLLM_CPU_SGL_KERNEL": 1,
 	    "VLLM_CPU_KVCACHE_SPACE": 40
         },
         "server_parameters": {
@@ -142,6 +155,8 @@
             "disable_log_stats": "",
             "disable_log_requests": "",
 	    "enforce_eager": "",
+	    "max_num_batched_tokens": 2048,
+	    "max_num_seqs": 256,
             "load_format": "dummy"
         },
         "client_parameters": {

From 2ca5f82c2a8152ba67eaa033fbdb479d28f4cc3b Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 30 Jul 2025 14:54:18 +0800
Subject: [PATCH 045/224] [Misc] Remove redundant config definitions (#21891)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/model_executor/models/aimv2.py           |  22 +-
 vllm/model_executor/models/dbrx.py            |  14 +-
 vllm/model_executor/models/exaone.py          |   8 +-
 vllm/model_executor/models/exaone4.py         |   6 +-
 vllm/model_executor/models/keye.py            |   3 -
 vllm/model_executor/models/minimax_vl_01.py   |   7 +-
 vllm/model_executor/models/mpt.py             |   8 +-
 vllm/model_executor/models/ovis.py            |  13 +-
 vllm/transformers_utils/config.py             |  28 +-
 vllm/transformers_utils/configs/__init__.py   |  30 +-
 vllm/transformers_utils/configs/cohere2.py    | 195 ------------
 vllm/transformers_utils/configs/dbrx.py       | 280 ------------------
 vllm/transformers_utils/configs/exaone.py     | 190 ------------
 vllm/transformers_utils/configs/exaone4.py    | 252 ----------------
 .../configs/minimax_text_01.py                |  70 -----
 .../configs/minimax_vl_01.py                  |  71 -----
 vllm/transformers_utils/configs/mpt.py        | 180 -----------
 vllm/transformers_utils/configs/nvlm_d.py     |  31 --
 vllm/transformers_utils/configs/ovis.py       | 184 ------------
 vllm/transformers_utils/configs/skyworkr1v.py |  54 ----
 vllm/transformers_utils/configs/solar.py      | 247 ---------------
 vllm/transformers_utils/configs/telechat2.py  |  64 ----
 .../transformers_utils/processors/__init__.py |   7 +
 23 files changed, 54 insertions(+), 1910 deletions(-)
 delete mode 100644 vllm/transformers_utils/configs/cohere2.py
 delete mode 100644 vllm/transformers_utils/configs/dbrx.py
 delete mode 100644 vllm/transformers_utils/configs/exaone.py
 delete mode 100644 vllm/transformers_utils/configs/exaone4.py
 delete mode 100644 vllm/transformers_utils/configs/minimax_text_01.py
 delete mode 100644 vllm/transformers_utils/configs/minimax_vl_01.py
 delete mode 100644 vllm/transformers_utils/configs/mpt.py
 delete mode 100644 vllm/transformers_utils/configs/nvlm_d.py
 delete mode 100644 vllm/transformers_utils/configs/ovis.py
 delete mode 100644 vllm/transformers_utils/configs/skyworkr1v.py
 delete mode 100644 vllm/transformers_utils/configs/solar.py
 delete mode 100644 vllm/transformers_utils/configs/telechat2.py

diff --git a/vllm/model_executor/models/aimv2.py b/vllm/model_executor/models/aimv2.py
index b13d863ebb744..d2307bb464bdb 100644
--- a/vllm/model_executor/models/aimv2.py
+++ b/vllm/model_executor/models/aimv2.py
@@ -8,6 +8,7 @@ from typing import Optional
 
 import torch
 import torch.nn as nn
+from transformers import PretrainedConfig
 
 from vllm.attention.layer import MultiHeadAttention
 from vllm.distributed import get_tensor_model_parallel_world_size
@@ -20,13 +21,12 @@ from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
 from vllm.model_executor.layers.quantization.base_config import (
     QuantizationConfig)
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
-from vllm.transformers_utils.configs.ovis import AIMv2Config
 
 
 class AIMv2SwiGLUFFN(nn.Module):
 
-    def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
-                 prefix: str):
+    def __init__(self, config: PretrainedConfig,
+                 quant_config: QuantizationConfig, prefix: str):
         super().__init__()
         hidden_features = config.intermediate_size
         in_features = config.hidden_size
@@ -57,7 +57,7 @@ class AIMv2SwiGLUFFN(nn.Module):
 
 class AIMv2PatchEmbed(nn.Module):
 
-    def __init__(self, config: AIMv2Config):
+    def __init__(self, config: PretrainedConfig):
         super().__init__()
         self.proj = nn.Conv2d(
             config.num_channels,
@@ -75,7 +75,7 @@ class AIMv2PatchEmbed(nn.Module):
 
 class AIMv2ViTPreprocessor(nn.Module):
 
-    def __init__(self, config: AIMv2Config):
+    def __init__(self, config: PretrainedConfig):
         super().__init__()
         num_patches = (config.image_size // config.patch_size)**2
 
@@ -93,8 +93,8 @@ class AIMv2ViTPreprocessor(nn.Module):
 
 class AIMv2Attention(nn.Module):
 
-    def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
-                 prefix: str):
+    def __init__(self, config: PretrainedConfig,
+                 quant_config: QuantizationConfig, prefix: str):
         super().__init__()
         self.config = config
         self.embed_dim = config.hidden_size
@@ -141,8 +141,8 @@ class AIMv2Attention(nn.Module):
 
 class AIMv2Block(nn.Module):
 
-    def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
-                 prefix: str):
+    def __init__(self, config: PretrainedConfig,
+                 quant_config: QuantizationConfig, prefix: str):
         super().__init__()
         self.attn = AIMv2Attention(config,
                                    quant_config=quant_config,
@@ -163,7 +163,7 @@ class AIMv2Transformer(nn.Module):
 
     def __init__(
         self,
-        config: AIMv2Config,
+        config: PretrainedConfig,
         quant_config: QuantizationConfig,
         *,
         require_post_norm: Optional[bool] = None,
@@ -193,7 +193,7 @@ class AIMv2Transformer(nn.Module):
 class AIMv2Model(torch.nn.Module):
 
     def __init__(self,
-                 config: AIMv2Config,
+                 config: PretrainedConfig,
                  quant_config: QuantizationConfig,
                  *,
                  require_post_norm: Optional[bool] = None,
diff --git a/vllm/model_executor/models/dbrx.py b/vllm/model_executor/models/dbrx.py
index 7a4dd69443ad7..360c7e66bf5ce 100644
--- a/vllm/model_executor/models/dbrx.py
+++ b/vllm/model_executor/models/dbrx.py
@@ -6,6 +6,7 @@ from typing import Optional, Union
 
 import torch
 import torch.nn as nn
+from transformers import PretrainedConfig
 
 from vllm.attention import Attention
 from vllm.config import CacheConfig, VllmConfig
@@ -24,7 +25,6 @@ from vllm.model_executor.model_loader.weight_utils import (
     default_weight_loader, maybe_remap_kv_scale_name)
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors
-from vllm.transformers_utils.configs.dbrx import DbrxConfig
 
 from .interfaces import SupportsPP
 from .utils import (AutoWeightsLoader, is_pp_missing_parameter,
@@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):
 
     def __init__(
         self,
-        config: DbrxConfig,
+        config: PretrainedConfig,
         params_dtype: Optional[torch.dtype] = None,
     ):
         super().__init__()
@@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):
 
     def __init__(
         self,
-        config: DbrxConfig,
+        config: PretrainedConfig,
         quant_config: Optional[QuantizationConfig] = None,
         params_dtype: Optional[torch.dtype] = None,
         prefix: str = "",
@@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):
 
     def __init__(
         self,
-        config: DbrxConfig,
+        config: PretrainedConfig,
         quant_config: Optional[QuantizationConfig] = None,
         params_dtype: Optional[torch.dtype] = None,
         prefix: str = "",
@@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):
 
     def __init__(
         self,
-        config: DbrxConfig,
+        config: PretrainedConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
@@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):
 
     def __init__(
         self,
-        config: DbrxConfig,
+        config: PretrainedConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
@@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):
 
     def __init__(
         self,
-        config: DbrxConfig,
+        config: PretrainedConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
diff --git a/vllm/model_executor/models/exaone.py b/vllm/model_executor/models/exaone.py
index aaf105ec2552a..8052b6bb82348 100644
--- a/vllm/model_executor/models/exaone.py
+++ b/vllm/model_executor/models/exaone.py
@@ -30,6 +30,7 @@ from typing import Any, Optional, Union
 
 import torch
 from torch import nn
+from transformers import PretrainedConfig
 
 from vllm.attention import Attention
 from vllm.compilation.decorators import support_torch_compile
@@ -49,7 +50,6 @@ from vllm.model_executor.model_loader.weight_utils import (
     default_weight_loader, maybe_remap_kv_scale_name)
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors
-from vllm.transformers_utils.configs.exaone import ExaoneConfig
 
 from .interfaces import SupportsLoRA, SupportsPP
 from .utils import (AutoWeightsLoader, PPMissingLayer, is_pp_missing_parameter,
@@ -99,7 +99,7 @@ class ExaoneAttention(nn.Module):
 
     def __init__(
         self,
-        config: ExaoneConfig,
+        config: PretrainedConfig,
         hidden_size: int,
         num_heads: int,
         num_kv_heads: int,
@@ -194,7 +194,7 @@ class ExaoneBlockAttention(nn.Module):
 
     def __init__(
         self,
-        config: ExaoneConfig,
+        config: PretrainedConfig,
         hidden_size: int,
         num_heads: int,
         num_kv_heads: int,
@@ -236,7 +236,7 @@ class ExaoneDecoderLayer(nn.Module):
 
     def __init__(
         self,
-        config: ExaoneConfig,
+        config: PretrainedConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
diff --git a/vllm/model_executor/models/exaone4.py b/vllm/model_executor/models/exaone4.py
index 97aeb6fd7b172..3d6ce3e8895fb 100644
--- a/vllm/model_executor/models/exaone4.py
+++ b/vllm/model_executor/models/exaone4.py
@@ -26,6 +26,7 @@ from typing import Any, Optional, Union
 
 import torch
 from torch import nn
+from transformers import PretrainedConfig
 
 from vllm.attention import Attention
 from vllm.compilation.decorators import support_torch_compile
@@ -45,7 +46,6 @@ from vllm.model_executor.model_loader.weight_utils import (
     default_weight_loader, maybe_remap_kv_scale_name)
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors
-from vllm.transformers_utils.configs.exaone4 import Exaone4Config
 
 from .interfaces import SupportsLoRA, SupportsPP
 from .utils import (AutoWeightsLoader, PPMissingLayer, extract_layer_index,
@@ -96,7 +96,7 @@ class Exaone4Attention(nn.Module):
 
     def __init__(
         self,
-        config: Exaone4Config,
+        config: PretrainedConfig,
         hidden_size: int,
         num_heads: int,
         num_kv_heads: int,
@@ -224,7 +224,7 @@ class Exaone4DecoderLayer(nn.Module):
 
     def __init__(
         self,
-        config: Exaone4Config,
+        config: PretrainedConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py
index 36e57b5e4f46a..892d970aaade0 100644
--- a/vllm/model_executor/models/keye.py
+++ b/vllm/model_executor/models/keye.py
@@ -980,9 +980,6 @@ class KeyeMultiModalDataParser(MultiModalDataParser):
 
 class KeyeProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_config(self):
-        return self.ctx.get_hf_config(PretrainedConfig)
-
     def get_hf_processor(
         self,
         *,
diff --git a/vllm/model_executor/models/minimax_vl_01.py b/vllm/model_executor/models/minimax_vl_01.py
index 9aba82cb115ed..62a7d37ec9d33 100644
--- a/vllm/model_executor/models/minimax_vl_01.py
+++ b/vllm/model_executor/models/minimax_vl_01.py
@@ -5,7 +5,7 @@ from typing import Literal, Optional, TypedDict, Union, cast
 
 import torch
 import torch.nn as nn
-from transformers import BatchFeature
+from transformers import BatchFeature, PretrainedConfig
 
 from vllm.config import VllmConfig
 from vllm.jsontree import json_map_leaves
@@ -17,7 +17,6 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import MultiModalFieldConfig
 from vllm.sequence import IntermediateTensors
-from vllm.transformers_utils.configs.minimax_vl_01 import MiniMaxVL01Config
 
 from .clip import CLIPVisionModel
 from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
@@ -90,8 +89,8 @@ class MiniMaxVL01DummyInputsBuilder(LlavaDummyInputsBuilder):
 
 class MiniMaxVL01ProcessingInfo(LlavaNextProcessingInfo):
 
-    def get_hf_config(self):
-        return self.ctx.get_hf_config(MiniMaxVL01Config)
+    def get_hf_config(self):  # Need to override the config type
+        return self.ctx.get_hf_config(PretrainedConfig)
 
     def get_hf_processor(self, **kwargs: object):
         hf_processor = self.ctx.get_hf_processor(**kwargs)
diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py
index 0878ada34d1d8..c243f575ae54a 100644
--- a/vllm/model_executor/models/mpt.py
+++ b/vllm/model_executor/models/mpt.py
@@ -8,6 +8,7 @@ from typing import Optional, Union
 
 import torch
 import torch.nn as nn
+from transformers import PretrainedConfig
 
 from vllm.attention import Attention
 from vllm.compilation.decorators import support_torch_compile
@@ -25,7 +26,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors
-from vllm.transformers_utils.configs.mpt import MPTConfig
 
 from .interfaces import SupportsPP
 from .utils import (AutoWeightsLoader, is_pp_missing_parameter,
@@ -50,7 +50,7 @@ class MPTAttention(nn.Module):
 
     def __init__(
         self,
-        config: MPTConfig,
+        config: PretrainedConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
@@ -144,7 +144,7 @@ class MPTMLP(nn.Module):
 
     def __init__(
         self,
-        config: MPTConfig,
+        config: PretrainedConfig,
         quant_config: Optional[QuantizationConfig] = None,
     ):
         super().__init__()
@@ -176,7 +176,7 @@ class MPTBlock(nn.Module):
 
     def __init__(
         self,
-        config: MPTConfig,
+        config: PretrainedConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py
index 111628d8d18cb..c8b528048b557 100644
--- a/vllm/model_executor/models/ovis.py
+++ b/vllm/model_executor/models/ovis.py
@@ -25,7 +25,7 @@ import torch
 import torch.nn as nn
 from torch import Tensor
 from torch.nn.functional import gumbel_softmax, pad, softmax
-from transformers import BaseImageProcessor, BatchFeature
+from transformers import BaseImageProcessor, BatchFeature, PretrainedConfig
 
 from vllm.config import VllmConfig
 from vllm.model_executor.layers.linear import ReplicatedLinear
@@ -48,8 +48,6 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
                                         BaseProcessingInfo, PromptReplacement)
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
-from vllm.transformers_utils.configs.ovis import (BaseVisualTokenizerConfig,
-                                                  OvisConfig)
 from vllm.transformers_utils.processors.ovis import OvisProcessor
 
 from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
@@ -83,7 +81,7 @@ class VisualTokenizer(torch.nn.Module):
 
     def __init__(
         self,
-        config: BaseVisualTokenizerConfig,
+        config: PretrainedConfig,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
     ):
@@ -107,7 +105,7 @@ class VisualTokenizer(torch.nn.Module):
 
     def _init_backbone(
         self,
-        config: BaseVisualTokenizerConfig,
+        config: PretrainedConfig,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
     ) -> nn.Module:
@@ -247,9 +245,6 @@ class VisualEmbedding(torch.nn.Embedding):
 
 class OvisProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_config(self):
-        return self.ctx.get_hf_config(OvisConfig)
-
     def get_hf_processor(self, **kwargs):
         return self.ctx.get_hf_processor(
             OvisProcessor,
@@ -417,7 +412,7 @@ class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
         config = vllm_config.model_config.hf_config
         quant_config = vllm_config.quant_config
 
-        self.config: OvisConfig = config
+        self.config: PretrainedConfig = config
         self.llm = init_vllm_registered_model(
             vllm_config=vllm_config.with_hf_config(config.get_text_config()),
             prefix=maybe_prefix(prefix, "llm"),
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 04ff08825bbc5..40a6a9118e53e 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -29,19 +29,13 @@ from vllm import envs
 from vllm.logger import init_logger
 # yapf conflicts with isort for this block
 # yapf: disable
-from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config,
-                                             DbrxConfig, DeepseekVLV2Config,
-                                             EAGLEConfig, Exaone4Config,
-                                             ExaoneConfig, JAISConfig,
+from vllm.transformers_utils.configs import (ChatGLMConfig, DeepseekVLV2Config,
+                                             EAGLEConfig, JAISConfig,
                                              KimiVLConfig, MedusaConfig,
-                                             MiniMaxText01Config,
-                                             MiniMaxVL01Config, MllamaConfig,
-                                             MLPSpeculatorConfig, MPTConfig,
+                                             MllamaConfig, MLPSpeculatorConfig,
                                              Nemotron_Nano_VL_Config,
-                                             NemotronConfig, NVLM_D_Config,
-                                             OvisConfig, RWConfig,
-                                             SkyworkR1VChatConfig, SolarConfig,
-                                             Telechat2Config, UltravoxConfig)
+                                             NemotronConfig, RWConfig,
+                                             UltravoxConfig)
 # yapf: enable
 from vllm.transformers_utils.configs.mistral import adapt_config_dict
 from vllm.transformers_utils.utils import check_gguf_file
@@ -77,28 +71,16 @@ _CONFIG_REGISTRY_OVERRIDE_HF: dict[str, type[PretrainedConfig]] = {
 
 _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
     "chatglm": ChatGLMConfig,
-    "cohere2": Cohere2Config,
-    "dbrx": DbrxConfig,
     "deepseek_vl_v2": DeepseekVLV2Config,
     "kimi_vl": KimiVLConfig,
     "Llama_Nemotron_Nano_VL": Nemotron_Nano_VL_Config,
-    "mpt": MPTConfig,
     "RefinedWeb": RWConfig,  # For tiiuae/falcon-40b(-instruct)
     "RefinedWebModel": RWConfig,  # For tiiuae/falcon-7b(-instruct)
     "jais": JAISConfig,
     "mlp_speculator": MLPSpeculatorConfig,
     "medusa": MedusaConfig,
     "eagle": EAGLEConfig,
-    "exaone": ExaoneConfig,
-    "exaone4": Exaone4Config,
-    "minimax_text_01": MiniMaxText01Config,
-    "minimax_vl_01": MiniMaxVL01Config,
     "nemotron": NemotronConfig,
-    "NVLM_D": NVLM_D_Config,
-    "ovis": OvisConfig,
-    "solar": SolarConfig,
-    "skywork_chat": SkyworkR1VChatConfig,
-    "telechat": Telechat2Config,
     "ultravox": UltravoxConfig,
     **_CONFIG_REGISTRY_OVERRIDE_HF
 }
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index 89303213a27e1..0fcb2beb8c7db 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -1,13 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Model configs may be defined in this directory for the following reasons:
+
+- There is no configuration file defined by HF Hub or Transformers library.
+- There is a need to override the existing config to support vLLM.
+"""
 
 from vllm.transformers_utils.configs.chatglm import ChatGLMConfig
-from vllm.transformers_utils.configs.cohere2 import Cohere2Config
-from vllm.transformers_utils.configs.dbrx import DbrxConfig
 from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
 from vllm.transformers_utils.configs.eagle import EAGLEConfig
-from vllm.transformers_utils.configs.exaone import ExaoneConfig
-from vllm.transformers_utils.configs.exaone4 import Exaone4Config
 # RWConfig is for the original tiiuae/falcon-40b(-instruct) and
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
@@ -15,36 +17,21 @@ from vllm.transformers_utils.configs.falcon import RWConfig
 from vllm.transformers_utils.configs.jais import JAISConfig
 from vllm.transformers_utils.configs.kimi_vl import KimiVLConfig
 from vllm.transformers_utils.configs.medusa import MedusaConfig
-from vllm.transformers_utils.configs.minimax_text_01 import MiniMaxText01Config
-from vllm.transformers_utils.configs.minimax_vl_01 import MiniMaxVL01Config
 from vllm.transformers_utils.configs.mllama import MllamaConfig
 from vllm.transformers_utils.configs.mlp_speculator import MLPSpeculatorConfig
 from vllm.transformers_utils.configs.moonvit import MoonViTConfig
-from vllm.transformers_utils.configs.mpt import MPTConfig
 from vllm.transformers_utils.configs.nemotron import NemotronConfig
 from vllm.transformers_utils.configs.nemotron_h import NemotronHConfig
 from vllm.transformers_utils.configs.nemotron_vl import Nemotron_Nano_VL_Config
-from vllm.transformers_utils.configs.nvlm_d import NVLM_D_Config
-from vllm.transformers_utils.configs.ovis import OvisConfig
-from vllm.transformers_utils.configs.skyworkr1v import SkyworkR1VChatConfig
-from vllm.transformers_utils.configs.solar import SolarConfig
-from vllm.transformers_utils.configs.telechat2 import Telechat2Config
 from vllm.transformers_utils.configs.ultravox import UltravoxConfig
 
 __all__ = [
     "ChatGLMConfig",
-    "Cohere2Config",
-    "DbrxConfig",
     "DeepseekVLV2Config",
-    "MPTConfig",
     "RWConfig",
     "JAISConfig",
     "MedusaConfig",
     "EAGLEConfig",
-    "ExaoneConfig",
-    "Exaone4Config",
-    "MiniMaxText01Config",
-    "MiniMaxVL01Config",
     "MllamaConfig",
     "MLPSpeculatorConfig",
     "MoonViTConfig",
@@ -52,10 +39,5 @@ __all__ = [
     "NemotronConfig",
     "NemotronHConfig",
     "Nemotron_Nano_VL_Config",
-    "NVLM_D_Config",
-    "OvisConfig",
-    "SkyworkR1VChatConfig",
-    "SolarConfig",
-    "Telechat2Config",
     "UltravoxConfig",
 ]
diff --git a/vllm/transformers_utils/configs/cohere2.py b/vllm/transformers_utils/configs/cohere2.py
deleted file mode 100644
index e547a9c281cff..0000000000000
--- a/vllm/transformers_utils/configs/cohere2.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# ruff: noqa
-
-# Adapted from
-# https://github.com/huggingface/transformers/blob/main/src/transformers/models/cohere2/configuration_cohere2.py
-from transformers import PretrainedConfig
-from transformers.modeling_rope_utils import rope_config_validation
-
-
-class Cohere2Config(PretrainedConfig):
-    r"""
-    This is the configuration class to store the configuration of a [`CohereModel`]. It is used to instantiate an Cohere
-    model according to the specified arguments, defining the model architecture.
-
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
-    documentation from [`PretrainedConfig`] for more information. Instantiating a configuration
-    with the defaults will yield a similar configuration to that of the [CohereForAI/c4ai-command-r-v01](https://huggingface.co/CohereForAI/c4ai-command-r-v01) model.
-
-
-    Args:
-        vocab_size (`int`, *optional*, defaults to 256000):
-            Vocabulary size of the Cohere model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`CohereModel`]
-        hidden_size (`int`, *optional*, defaults to 8192):
-            Dimension of the hidden representations.
-        intermediate_size (`int`, *optional*, defaults to 22528):
-            Dimension of the MLP representations.
-        logit_scale (`float`, *optional*, defaults to 0.0625):
-            The scaling factor for the output logits.
-        num_hidden_layers (`int`, *optional*, defaults to 40):
-            Number of hidden layers in the Transformer decoder.
-        num_attention_heads (`int`, *optional*, defaults to 64):
-            Number of attention heads for each attention layer in the Transformer decoder.
-        num_key_value_heads (`int`, *optional*):
-            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
-            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
-            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
-            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
-            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
-            `num_attention_heads`.
-        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
-            The non-linear activation function (function or string) in the decoder.
-        max_position_embeddings (`int`, *optional*, defaults to 8192):
-            The maximum sequence length that this model might ever be used with.
-        initializer_range (`float`, *optional*, defaults to 0.02):
-            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        layer_norm_eps (`float`, *optional*, defaults to 1e-05):
-            The epsilon used by the layer normalization.
-        use_cache (`bool`, *optional*, defaults to `True`):
-            Whether or not the model should return the last key/values attentions (not used by all models). Only
-            relevant if `config.is_decoder=True`.
-        pad_token_id (`int`, *optional*, defaults to 0):
-            Padding token id.
-        bos_token_id (`int`, *optional*, defaults to 5):
-            Beginning of stream token id.
-        eos_token_id (`int`, *optional*, defaults to 255001):
-            End of stream token id.
-        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
-            Whether to tie weight embeddings
-        rope_theta (`float`, *optional*, defaults to 10000.0):
-            The base period of the RoPE embeddings.
-        rope_scaling (`dict`, *optional*):
-            Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
-            and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
-            accordingly.
-            Expected contents:
-                `rope_type` (`str`):
-                    The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope',
-                    'llama3'], with 'default' being the original RoPE implementation.
-                `factor` (`float`, *optional*):
-                    Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In
-                    most scaling types, a `factor` of x will enable the model to handle sequences of length x *
-                    original maximum pre-trained length.
-                `original_max_position_embeddings` (`int`, *optional*):
-                    Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during
-                    pretraining.
-                `attention_factor` (`float`, *optional*):
-                    Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention
-                    computation. If unspecified, it defaults to value recommended by the implementation, using the
-                    `factor` field to infer the suggested value.
-                `beta_fast` (`float`, *optional*):
-                    Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear
-                    ramp function. If unspecified, it defaults to 32.
-                `beta_slow` (`float`, *optional*):
-                    Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
-                    ramp function. If unspecified, it defaults to 1.
-                `short_factor` (`list[float]`, *optional*):
-                    Only used with 'longrope'. The scaling factor to be applied to short contexts (<
-                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
-                    size divided by the number of attention heads divided by 2
-                `long_factor` (`list[float]`, *optional*):
-                    Only used with 'longrope'. The scaling factor to be applied to long contexts (<
-                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
-                    size divided by the number of attention heads divided by 2
-                `low_freq_factor` (`float`, *optional*):
-                    Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE
-                `high_freq_factor` (`float`, *optional*):
-                    Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE
-        attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
-            Whether to use a bias in the query, key, value and output projection layers during self-attention.
-        attention_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for the attention probabilities.
-        sliding_window (`int`, *optional*, defaults to 4096):
-            Size of the sliding window attention context.
-        sliding_window_pattern (`int`, *optional*, defaults to 4):
-            Pattern for the sliding window attention.
-        cache_implementation (`str`, *optional*, defaults to `"hybrid"`): the cache type to be used with `generate`.
-
-    ```python
-    >>> from transformers import Cohere2Model, Cohere2Config
-
-    >>> # Initializing a Cohere Nextmodel configuration
-    >>> configuration = Cohere2Config()
-
-    >>> # Initializing a model from the Cohere2 configuration
-    >>> model = Cohere2Model(configuration) # doctest: +SKIP
-
-    >>> # Accessing the model configuration
-    >>> configuration = model.config # doctest: +SKIP
-    ```
-    """
-
-    model_type = "cohere2"
-    keys_to_ignore_at_inference = ["past_key_values"]
-
-    def __init__(
-        self,
-        vocab_size=256000,
-        hidden_size=8192,
-        intermediate_size=22528,
-        logit_scale=0.0625,
-        num_hidden_layers=40,
-        num_attention_heads=64,
-        num_key_value_heads=None,
-        hidden_act="silu",
-        max_position_embeddings=8192,
-        initializer_range=0.02,
-        layer_norm_eps=1e-5,
-        use_cache=True,
-        pad_token_id=0,
-        bos_token_id=5,
-        eos_token_id=255001,
-        tie_word_embeddings=True,
-        rope_theta=10000.0,
-        rope_scaling=None,
-        attention_bias=False,
-        attention_dropout=0.0,
-        sliding_window=4096,
-        sliding_window_pattern=4,
-        cache_implementation="hybrid",
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.hidden_size = hidden_size
-        self.logit_scale = logit_scale
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-
-        # for backward compatibility
-        if num_key_value_heads is None:
-            num_key_value_heads = num_attention_heads
-
-        self.num_key_value_heads = num_key_value_heads
-        self.hidden_act = hidden_act
-        self.initializer_range = initializer_range
-        self.layer_norm_eps = layer_norm_eps
-        self.use_cache = use_cache
-        self.rope_theta = rope_theta
-        self.rope_scaling = rope_scaling
-        self.attention_bias = attention_bias
-        self.attention_dropout = attention_dropout
-        self.sliding_window = sliding_window
-        self.sliding_window_pattern = sliding_window_pattern
-        # Need to specify head_dim in the config so it can be used in the attention forward functions
-        self.head_dim = hidden_size // num_attention_heads
-        self.cache_implementation = cache_implementation
-
-        # Validate the correctness of rotary position embeddings parameters
-        rope_config_validation(self)
-
-        super().__init__(
-            pad_token_id=pad_token_id,
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            tie_word_embeddings=tie_word_embeddings,
-            **kwargs,
-        )
-
-
-__all__ = ["Cohere2Config"]
diff --git a/vllm/transformers_utils/configs/dbrx.py b/vllm/transformers_utils/configs/dbrx.py
deleted file mode 100644
index 7dbda99f85a4e..0000000000000
--- a/vllm/transformers_utils/configs/dbrx.py
+++ /dev/null
@@ -1,280 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# yapf: disable
-# ruff: noqa: E501
-# coding=utf-8
-# Copied from
-# https://huggingface.co/databricks/dbrx-base/blob/main/configuration_dbrx.py
-"""Dbrx configuration."""
-
-from typing import Any, Optional
-
-from transformers.configuration_utils import PretrainedConfig
-from transformers.utils import logging
-
-logger = logging.get_logger(__name__)
-
-DBRX_PRETRAINED_CONFIG_ARCHIVE_MAP = {} # type: ignore
-
-
-class DbrxAttentionConfig(PretrainedConfig):
-    """Configuration class for Dbrx Attention.
-
-    [`DbrxAttention`] class. It is used to instantiate attention layers
-    according to the specified arguments, defining the layers architecture.
-
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
-    documentation from [`PretrainedConfig`] for more information.
-
-    Args:
-        attn_pdrop (`float`, *optional*, defaults to 0.0):
-            The dropout probability for the attention layers.
-        clip_qkv (`float`, *optional*, defaults to None):
-            If not `None`, clip the queries, keys, and values in the attention layer to this value.
-        kv_n_heads (Optional[int]): For grouped_query_attention only, allow user to specify number of kv heads.
-        rope_theta (float): The base frequency for rope.
-    """
-
-    def __init__(
-        self,
-        attn_pdrop: float = 0,
-        clip_qkv: Optional[float] = None,
-        kv_n_heads: int = 1,
-        rope_theta: float = 10000.0,
-        **kwargs: Any,
-    ):
-        super().__init__(**kwargs)
-        self.attn_pdrop = attn_pdrop
-        self.clip_qkv = clip_qkv
-        self.kv_n_heads = kv_n_heads
-        self.rope_theta = rope_theta
-
-        for k in ["model_type"]:
-            if k in kwargs:
-                kwargs.pop(k)
-        if len(kwargs) != 0:
-            raise ValueError(f"Found unknown {kwargs=}")
-
-    @classmethod
-    def from_pretrained(
-        cls, pretrained_model_name_or_path: str, **kwargs: Any
-    ) -> "PretrainedConfig":
-        cls._set_token_in_kwargs(kwargs)
-
-        config_dict, kwargs = cls.get_config_dict(
-            pretrained_model_name_or_path, **kwargs
-        )
-
-        if config_dict.get("model_type") == "dbrx":
-            config_dict = config_dict["attn_config"]
-
-        if (
-            "model_type" in config_dict
-            and hasattr(cls, "model_type")
-            and config_dict["model_type"] != cls.model_type
-        ):
-            logger.warning(
-                "You are using a model of type %s to instantiate a model of "
-                "type %s. This is not supported for all configurations of "
-                "models and can yield errors.",
-                config_dict["model_type"], cls.model_type)
-
-        return cls.from_dict(config_dict, **kwargs)
-
-
-class DbrxFFNConfig(PretrainedConfig):
-    """Configuration class for Dbrx FFN.
-
-    [`DbrxFFN`] class. It is used to instantiate feedforward layers according to
-    the specified arguments, defining the layers architecture.
-
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
-    documentation from [`PretrainedConfig`] for more information.
-
-    Args:
-        ffn_act_fn (dict, optional): A dict specifying activation function for the FFN.
-            The dict should have a key 'name' with the value being the name of
-            the activation function along with any additional keyword arguments.
-        ffn_hidden_size (int, optional): The hidden size of the feedforward network.
-        moe_num_experts (int, optional): The number of experts in the mixture of experts layer.
-        moe_top_k (int, optional): The number of experts to use in the mixture of experts layer.
-        moe_jitter_eps (float, optional): The jitter epsilon for the mixture of experts layer.
-        moe_loss_weight (float, optional): The loss weight for the mixture of experts layer.
-        moe_normalize_expert_weights (float, optional): The normalization factor for the expert weights.
-        uniform_expert_assignment (bool, optional): Whether to use uniform expert assignment.
-            This should only be used for benchmarking purposes.
-    """
-
-    def __init__(
-        self,
-        ffn_act_fn: Optional[dict] = None,
-        ffn_hidden_size: int = 3584,
-        moe_num_experts: int = 4,
-        moe_top_k: int = 1,
-        moe_jitter_eps: Optional[float] = None,
-        moe_loss_weight: float = 0.01,
-        moe_normalize_expert_weights: Optional[float] = 1,
-        uniform_expert_assignment: bool = False,
-        **kwargs: Any,
-    ):
-        super().__init__()
-        if ffn_act_fn is None:
-            ffn_act_fn = {"name": "silu"}
-        self.ffn_act_fn = ffn_act_fn
-        self.ffn_hidden_size = ffn_hidden_size
-        self.moe_num_experts = moe_num_experts
-        self.moe_top_k = moe_top_k
-        self.moe_jitter_eps = moe_jitter_eps
-        self.moe_loss_weight = moe_loss_weight
-        self.moe_normalize_expert_weights = moe_normalize_expert_weights
-        self.uniform_expert_assignment = uniform_expert_assignment
-
-        for k in ["model_type"]:
-            if k in kwargs:
-                kwargs.pop(k)
-        if len(kwargs) != 0:
-            raise ValueError(f"Found unknown {kwargs=}")
-
-    @classmethod
-    def from_pretrained(
-        cls, pretrained_model_name_or_path: str, **kwargs: Any
-    ) -> "PretrainedConfig":
-        cls._set_token_in_kwargs(kwargs)
-
-        config_dict, kwargs = cls.get_config_dict(
-            pretrained_model_name_or_path, **kwargs
-        )
-
-        if config_dict.get("model_type") == "dbrx":
-            config_dict = config_dict["ffn_config"]
-
-        if (
-            "model_type" in config_dict
-            and hasattr(cls, "model_type")
-            and config_dict["model_type"] != cls.model_type
-        ):
-            logger.warning(
-                "You are using a model of type %s to instantiate a model of "
-                "type %s. This is not supported for all "
-                "configurations of models and can yield errors.", config_dict["model_type"], cls.model_type)
-
-        return cls.from_dict(config_dict, **kwargs)
-
-
-class DbrxConfig(PretrainedConfig):
-    """Configuration class for Dbrx.
-
-    [`DbrxModel`]. It is used to instantiate a Dbrx model according to the
-    specified arguments, defining the model architecture.
-
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
-    documentation from [`PretrainedConfig`] for more information.
-
-
-    Args:
-        d_model (`int`, *optional*, defaults to 6144):
-            Dimensionality of the embeddings and hidden states.
-        n_heads (`int`, *optional*, defaults to 48):
-            Number of attention heads for each attention layer in the Transformer encoder.
-        n_layers (`int`, *optional*, defaults to 40):
-            Number of hidden layers in the Transformer encoder.
-        max_seq_len (`int`, *optional*, defaults to 32768):
-            The maximum sequence length of the model.
-        vocab_size (`int`, *optional*, defaults to 100352):
-            Vocabulary size of the Dbrx model. Defines the maximum number of different tokens that can be represented by
-            the `inputs_ids` passed when calling [`DbrxModel`].
-        resid_pdrop (`float`, *optional*, defaults to 0.0):
-            The dropout probability applied to the attention output before combining with residual.
-        emb_pdrop (`float`, *optional*, defaults to 0.0):
-            The dropout probability for the embedding layer.
-        attn_config (`dict`, *optional*):
-            A dictionary used to configure the model's attention module.
-        ffn_config (`dict`, *optional*):
-            A dictionary used to configure the model's FFN module.
-        use_cache (`bool`, *optional*, defaults to `False`):
-            Whether or not the model should return the last key/values attentions (not used by all models).
-        initializer_range (`float`, *optional*, defaults to 0.02):
-            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        output_router_logits (`bool`, *optional*, defaults to `False`):
-            Whether or not the router logits should be returned by the model. Enabling this will also allow the model to output the auxiliary loss.
-        router_aux_loss_coef (`float`, *optional*, defaults to 0.001):
-            The aux loss factor for the total loss.
-
-
-    Example:
-    ```python
-    >>> from transformers import DbrxConfig, DbrxModel
-
-    >>> # Initializing a Dbrx configuration
-    >>> configuration = DbrxConfig()
-
-    >>> # Initializing a model (with random weights) from the configuration
-    >>> model = DbrxModel(configuration)
-
-    >>> # Accessing the model configuration
-    >>> configuration = model.config
-    ```
-    """
-
-    model_type = "dbrx"
-    attribute_map = {
-        "num_attention_heads": "n_heads",
-        "hidden_size": "d_model",
-        "num_hidden_layers": "n_layers",
-        "max_position_embeddings": "max_seq_len",
-    }
-
-    def __init__(
-        self,
-        d_model: int = 2048,
-        n_heads: int = 16,
-        n_layers: int = 24,
-        max_seq_len: int = 2048,
-        vocab_size: int = 32000,
-        resid_pdrop: float = 0.0,
-        emb_pdrop: float = 0.0,
-        attn_config: Optional[DbrxAttentionConfig] = None,
-        ffn_config: Optional[DbrxFFNConfig] = None,
-        use_cache: bool = True,
-        initializer_range: float = 0.02,
-        output_router_logits: bool = False,
-        router_aux_loss_coef: float = 0.05,
-        **kwargs: Any,
-    ):
-        if attn_config is None:
-            self.attn_config = DbrxAttentionConfig()
-        elif isinstance(attn_config, dict):
-            self.attn_config = DbrxAttentionConfig(**attn_config)
-        else:
-            self.attn_config = attn_config
-
-        if ffn_config is None:
-            self.ffn_config = DbrxFFNConfig()
-        elif isinstance(ffn_config, dict):
-            self.ffn_config = DbrxFFNConfig(**ffn_config)
-        else:
-            self.ffn_config = ffn_config
-
-        self.d_model = d_model
-        self.n_heads = n_heads
-        self.n_layers = n_layers
-        self.max_seq_len = max_seq_len
-        self.vocab_size = vocab_size
-        self.resid_pdrop = resid_pdrop
-        self.emb_pdrop = emb_pdrop
-        self.use_cache = use_cache
-        self.initializer_range = initializer_range
-        self.output_router_logits = output_router_logits
-        self.router_aux_loss_coef = router_aux_loss_coef
-
-        tie_word_embeddings = kwargs.pop("tie_word_embeddings", False)
-        if tie_word_embeddings:
-            raise ValueError(
-                "tie_word_embeddings is not supported for Dbrx models."
-            )
-
-        super().__init__(
-            tie_word_embeddings=tie_word_embeddings,
-            **kwargs,
-        )
diff --git a/vllm/transformers_utils/configs/exaone.py b/vllm/transformers_utils/configs/exaone.py
deleted file mode 100644
index 7450904a15caf..0000000000000
--- a/vllm/transformers_utils/configs/exaone.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# Copied from
-# https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/configuration_exaone.py
-# Copyright 2021 The LG AI Research EXAONE Lab. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Exaone model configuration"""
-
-from transformers.configuration_utils import PretrainedConfig
-from transformers.utils import logging
-
-logger = logging.get_logger(__name__)
-
-EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: dict[str, str] = {}
-
-
-class ExaoneConfig(PretrainedConfig):
-    r"""
-    This is the configuration class to store the configuration of a :class:
-    `~transformers.ExaoneModel`. It is used to instantiate a GPT Lingvo model
-    according to the specified arguments, defining the model architecture.
-    Instantiating a configuration with the defaults will yield a similar
-    configuration to that of the Exaone
-
-    Configuration objects inherit from {class}`~transformers.PretrainedConfig`
-    and can be used to control the model outputs. Read the documentation from :
-    class:`~transformers.PretrainedConfig` for more information.
-
-    Args:
-        vocab_size ({obj}`int`, `optional`, defaults to 50257):
-            Vocabulary size of the GPT Lingvo model. Defines the number of
-            different tokens that can be represented by the {obj}`inputs_ids`
-            passed when calling {class}`~transformers.ExaoneModel`. Vocabulary
-            size of the model.
-            Defines the different tokens that can be represented by the
-            `inputs_ids` passed to the forward method of :class:
-            `~transformers.EXAONEModel`.
-        hidden_size ({obj}`int`, `optional`, defaults to 2048):
-            Dimensionality of the encoder layers and the pooler layer.
-        num_layers ({obj}`int`, `optional`, defaults to 24):
-            Number of hidden layers in the Transformer encoder.
-        num_attention_heads (`int`, *optional*, defaults to 32):
-            Number of attention heads for each attention layer in the
-            Transformer decoder.
-        num_key_value_heads (`int`, *optional*):
-            This is the number of key_value heads that should be used to
-            implement Grouped Query Attention. If
-            `num_key_value_heads=num_attention_heads`, the model will use Multi
-            Head Attention (MHA), if `num_key_value_heads=1 the model will use
-            Multi Query Attention (MQA) otherwise GQA is used. When
-            converting a multi-head checkpoint to a GQA checkpoint,
-            each group key and value head should be constructed by meanpooling
-            all the original heads within that group. For more details checkout
-            [this paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not
-            specified, will default to `num_attention_heads`.
-        rotary_pct (`float`, *optional*, defaults to 0.25):
-            percentage of hidden dimensions to allocate to rotary embeddings
-        intermediate_size ({obj}`int`, `optional`, defaults to 8192):
-            Dimensionality of the "intermediate" (i.e., feed-forward) layer in
-            the Transformer encoder.
-        activation_function ({obj}`str` or {obj}`function`, `optional`,
-        defaults to {obj}`"gelu_new"`):
-            The non-linear activation function (function or string) in the
-            encoder and pooler. If string, {obj}`"gelu"`, {obj}`"relu"`,
-            {obj}`"selu"` and {obj}`"gelu_new"` are supported.
-        embed_dropout ({obj}`float`, `optional`, defaults to 0.0):
-            The dropout probabilitiy for all fully connected layers in the
-            embeddings, encoder, and pooler.
-        attention_dropout ({obj}`float`, `optional`, defaults to 0.0):
-            The dropout ratio for the attention probabilities.
-        max_position_embeddings ({obj}`int`, `optional`, defaults to 2048):
-            The maximum sequence length that this model might ever be used with.
-            Typically set this to something large just in case
-            (e.g., 512 or 1024 or 2048).
-        type_vocab_size ({obj}`int`, `optional`, defaults to 2):
-            The vocabulary size of the {obj}`token_type_ids` passed when calling
-            {class}`~transformers.EXAONEModel`.
-        initializer_range ({obj}`float`, `optional`, defaults to 0.02):
-            The standard deviation of the truncated_normal_initializer for
-            initializing all weight matrices.
-        layer_norm_epsilon ({obj}`float`, `optional`, defaults to 1e-5):
-            The epsilon used by the layer normalization layers.
-        use_cache ({obj}`bool`, `optional`, defaults to {obj}`True`):
-            Whether or not the model should return the last key/values
-            attentions (not used by all models).
-            Only relevant if ``config.is_decoder=True``.
-        gradient_checkpointing ({obj}`bool`, `optional`,
-        defaults to {obj}`False`):
-            If True, use gradient checkpointing to save memory at the expense
-            of slower backward pass.
-        Example::
-
-            >>> from transformers import ExoneModel, ExaoneConfig
-
-            >>> # Initializing a EXAONE configuration
-            >>> configuration = ExaoneConfig()
-
-            >>> # Initializing a model from configuration
-            >>> model = ExoneModel(configuration)
-
-            >>> # Accessing the model configuration
-            >>> configuration = model.config
-    """
-
-    model_type = "exaone"
-    keys_to_ignore_at_inference = ["past_key_values"]
-    attribute_map = {"num_hidden_layers": "num_layers"}
-
-    def __init__(
-        self,
-        vocab_size=102400,
-        max_position_embeddings=2048,
-        hidden_size=2048,
-        num_layers=32,
-        num_attention_heads=32,
-        num_key_value_heads=None,
-        intermediate_size=None,
-        activation_function="silu",
-        rotary_pct=0.25,
-        resid_dropout=0.0,
-        embed_dropout=0.0,
-        attention_dropout=0.0,
-        layer_norm_epsilon=1e-6,
-        initializer_range=0.02,
-        use_cache=True,
-        bos_token_id=0,
-        eos_token_id=2,
-        tie_word_embeddings=True,
-        **kwargs,
-    ):
-        super().__init__(
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            tie_word_embeddings=tie_word_embeddings,
-            **kwargs,
-        )
-
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.hidden_size = hidden_size
-        self.num_layers = num_layers
-        self.num_attention_heads = num_attention_heads
-        self.num_hidden_layers = num_layers
-        if num_key_value_heads is None:
-            num_key_value_heads = num_attention_heads
-        self.num_key_value_heads = num_key_value_heads
-        if intermediate_size:
-            self.intermediate_size = intermediate_size
-        else:
-            self.intermediate_size = hidden_size * 4
-        self.activation_function = activation_function
-        self.resid_dropout = resid_dropout
-        self.embed_dropout = embed_dropout
-        self.attention_dropout = attention_dropout
-        self.layer_norm_epsilon = layer_norm_epsilon
-        self.initializer_range = initializer_range
-        self.use_cache = use_cache
-        self.rotary_pct = rotary_pct
-
-        self.bos_token_id = bos_token_id
-        self.eos_token_id = eos_token_id
-
-        self.use_logit_cap = kwargs.pop("use_logit_cap", False)
-        self.ln_no_scale = kwargs.pop("ln_no_scale", False)
-        self.use_gated = kwargs.pop("use_gated", False)
-        self.use_emb_norm = kwargs.pop("use_emb_norm", False)
-        self.use_rotary_pos = kwargs.pop("use_rotary_pos", False)
-        self.rotary_type = kwargs.pop("rotary_type", None)
-        self.scaling_factor = kwargs.pop("scaling_factor", 1)
-        self.use_absolute_pos = kwargs.pop("use_absolute_pos", True)
-        self.use_extra_logit = kwargs.pop("use_extra_logit", True)
-        self.rotary_expand_length = kwargs.pop("rotary_expand_length", None)
-        self.rotary_base = kwargs.pop("rotary_base", 10000.0)
-        self.use_qkv_fuse = kwargs.pop("use_qkv_fuse", False)
-        self.rescale_before_lm_head = kwargs.pop("rescale_before_lm_head",
-                                                 (rotary_pct == 0.25))
-        if self.use_rotary_pos:
-            self.use_absolute_pos = False
diff --git a/vllm/transformers_utils/configs/exaone4.py b/vllm/transformers_utils/configs/exaone4.py
deleted file mode 100644
index a22ebaa6bd6bb..0000000000000
--- a/vllm/transformers_utils/configs/exaone4.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-# ruff: noqa: E501
-
-# Copied from
-# https://github.com/lgai-exaone/transformers/blob/add-exaone4/src/transformers/models/exaone4/configuration_exaone4.py
-# Copyright 2025 The LG CNS Gen AI Solution Delivery Team.
-# Copyright 2025 The LG AI Research and HuggingFace Inc. team. All rights reserved.
-#
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from transformers.configuration_utils import (PretrainedConfig,
-                                              layer_type_validation)
-from transformers.utils import logging
-
-logger = logging.get_logger(__name__)
-
-
-def check_is_sliding(config, layer_idx):
-    """
-    Check if the current layer is a sliding window attention (local attention) layer.
-    """
-    if config.sliding_window is None:
-        return False
-    if config.layer_types is not None:
-        return config.layer_types[layer_idx] == "sliding_attention"
-    if isinstance(config.sliding_window_pattern, int):
-        return ((layer_idx + 1) % config.sliding_window_pattern) != 0
-    elif isinstance(config.sliding_window_pattern, str):
-        assert isinstance(config.sliding_window, int), (
-            f"Sliding window must be positive integer, but got {config.sliding_window}"
-        )
-        return (layer_idx != config.num_hidden_layers - 1
-                and config.sliding_window_pattern[layer_idx % len(
-                    config.sliding_window_pattern)] == "L")
-    else:
-        logger.warning_once(
-            "Sliding window is set, but none of `sliding_window_pattern` or `layer_types` is set. "
-            "Defaulting to use 'full_attention' for all layers.")
-    return False
-
-
-class Exaone4Config(PretrainedConfig):
-    r"""
-    This is the configuration class to store the configuration of a [`Exaone4Model`]. It is used to
-    instantiate a EXAONE 4.0 model according to the specified arguments, defining the model architecture. Instantiating a
-    configuration with the defaults will yield a similar configuration to that of the EXAONE-4.0-Instruct [LGAI-EXAONE/EXAONE-4.0-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-Instruct)
-    NOTE: `EXAONE-4.0-Instruct` is a placeholder model ID. The exact model ID will be updated in the future.
-
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
-    outputs. Read the documentation from [`PretrainedConfig`] for more information.
-
-    Args:
-        vocab_size (`int`, *optional*, defaults to 102400):
-            Vocabulary size of the EXAONE 4.0 model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`Exaone4Model`].
-        hidden_size (`int`, *optional*, defaults to 4096):
-            Dimension of the hidden representations.
-        intermediate_size (`int`, *optional*, defaults to `hidden_size * 4`):
-            Dimensionality of the MLP representations.
-        num_hidden_layers (`int`, *optional*, defaults to 32):
-            Number of hidden layers in the Transformer encoder.
-        num_attention_heads (`int`, *optional*, defaults to 32):
-            Number of attention heads for each attention layer in the Transformer decoder.
-        num_key_value_heads (`int`, *optional*):
-            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
-            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
-            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
-            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
-            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
-            `num_attention_heads`.
-        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
-            The non-linear activation function (function or string) in the decoder.
-        max_position_embeddings (`int`, *optional*, defaults to 2048):
-            The maximum sequence length that this model might ever be used with. Typically set this to something large
-            just in case (e.g., 32768 for EXAONE 3.5).
-        initializer_range (`float`, *optional*, defaults to 0.02):
-            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        rms_norm_eps (`float`, *optional*, defaults to 1e-05):
-            The epsilon used by the layer normalization layers.
-        use_cache (`bool`, *optional*, defaults to `True`):
-            Whether or not the model should return the last key/values attentions (not used by all models). Only
-            relevant if ``config.is_decoder=True``.
-        bos_token_id (`int`, *optional*, defaults to 0):
-            Beginning of stream token id.
-        eos_token_id (`int`, *optional*, defaults to 2):
-            End of stream token id.
-        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
-            Whether to tie weight embeddings
-        rope_theta (`float`, *optional*, defaults to 10000.0):
-            The base period of the RoPE embeddings.
-        rope_scaling (`Dict`, *optional*):
-            Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
-            and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
-            accordingly.
-            Expected contents:
-                `rope_type` (`str`):
-                    The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope',
-                    'llama3'], with 'default' being the original RoPE implementation.
-                `factor` (`float`, *optional*):
-                    Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In
-                    most scaling types, a `factor` of x will enable the model to handle sequences of length x *
-                    original maximum pre-trained length.
-                `original_max_position_embeddings` (`int`, *optional*):
-                    Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during
-                    pretraining.
-                `attention_factor` (`float`, *optional*):
-                    Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention
-                    computation. If unspecified, it defaults to value recommended by the implementation, using the
-                    `factor` field to infer the suggested value.
-                `beta_fast` (`float`, *optional*):
-                    Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear
-                    ramp function. If unspecified, it defaults to 32.
-                `beta_slow` (`float`, *optional*):
-                    Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
-                    ramp function. If unspecified, it defaults to 1.
-                `short_factor` (`List[float]`, *optional*):
-                    Only used with 'longrope'. The scaling factor to be applied to short contexts (<
-                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
-                    size divided by the number of attention heads divided by 2
-                `long_factor` (`List[float]`, *optional*):
-                    Only used with 'longrope'. The scaling factor to be applied to long contexts (<
-                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
-                    size divided by the number of attention heads divided by 2
-                `low_freq_factor` (`float`, *optional*):
-                    Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE
-                `high_freq_factor` (`float`, *optional*):
-                    Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE
-        attention_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for the attention probabilities.
-        sliding_window (`int`, *optional*):
-            The size of the sliding window for the sliding window attention.
-        sliding_window_pattern (`str`, *optional*):
-            The pattern to use for sliding window attention. Can be one of:
-                - `None`: No sliding window attention is used
-                - `int`: Every `sliding_window` layers, use global attention, else use local attention.
-                - `str`: A sequence of "L" (local attention) and "G" (global attention) characters that defines the
-                  attention pattern. The pattern starts from layer 0 and repeats every `sliding_window` layers. The
-                  final layer always uses global attention regardless of the pattern.
-            For instance, sliding_window_pattern="LLLG" same as sliding_window=4, which means:
-                - Layer 0, 1, 2: local attention,
-                - Layer 3: global attention,
-                ...(repeated)
-        layer_types (`list`, *optional*):
-            Attention pattern for each layer. Prioritized over `sliding_window_pattern`.
-
-    Example:
-
-    ```python
-    >>> from transformers import Exaone4Model, Exaone4Config
-
-    >>> # Initializing a EXAONE configuration
-    >>> configuration = Exaone4Config()
-
-    >>> # Initializing a model from configuration
-    >>> model = Exaone4Model(configuration)
-
-    >>> # Accessing the model configuration
-    >>> configuration = model.config
-    ```"""
-
-    model_type = "exaone4"
-    keys_to_ignore_at_inference = ["past_key_values"]
-    # Default tensor parallel plan for base model `LlamaModel`
-    base_model_tp_plan = {
-        "layers.*.self_attn.q_proj": "colwise",
-        "layers.*.self_attn.k_proj": "colwise",
-        "layers.*.self_attn.v_proj": "colwise",
-        "layers.*.self_attn.o_proj": "rowwise",
-        "layers.*.mlp.gate_proj": "colwise",
-        "layers.*.mlp.up_proj": "colwise",
-        "layers.*.mlp.down_proj": "rowwise",
-    }
-    base_model_pp_plan = {
-        "embed_tokens": (["input_ids"], ["inputs_embeds"]),
-        "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
-        "norm": (["hidden_states"], ["hidden_states"]),
-    }
-
-    def __init__(
-        self,
-        vocab_size=102400,
-        hidden_size=4096,
-        intermediate_size=None,
-        num_hidden_layers=32,
-        num_attention_heads=32,
-        num_key_value_heads=None,
-        hidden_act="silu",
-        max_position_embeddings=2048,
-        initializer_range=0.02,
-        rms_norm_eps=1e-5,
-        use_cache=True,
-        bos_token_id=0,
-        eos_token_id=2,
-        tie_word_embeddings=False,
-        rope_theta=10000.0,
-        rope_scaling=None,
-        attention_dropout=0.0,
-        sliding_window=None,
-        sliding_window_pattern=None,
-        layer_types=None,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.hidden_size = hidden_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        if num_key_value_heads is None:
-            num_key_value_heads = num_attention_heads
-        self.num_key_value_heads = num_key_value_heads
-        if intermediate_size:
-            self.intermediate_size = intermediate_size
-        else:
-            self.intermediate_size = hidden_size * 4
-        self.hidden_act = hidden_act
-        self.max_position_embeddings = max_position_embeddings
-        self.initializer_range = initializer_range
-        self.rms_norm_eps = rms_norm_eps
-        self.use_cache = use_cache
-        self.attention_dropout = attention_dropout
-        self.rope_theta = rope_theta
-        self.rope_scaling = rope_scaling
-        self.sliding_window = sliding_window
-        self.sliding_window_pattern = sliding_window_pattern
-
-        self.layer_types = layer_types
-        if self.layer_types is None:
-            self.layer_types = [
-                "sliding_attention"
-                if check_is_sliding(self, i) else "full_attention"
-                for i in range(self.num_hidden_layers)
-            ]
-        layer_type_validation(self.layer_types)
-
-        super().__init__(bos_token_id=bos_token_id,
-                         eos_token_id=eos_token_id,
-                         tie_word_embeddings=tie_word_embeddings,
-                         **kwargs)
-
-
-__all__ = ["Exaone4Config"]
diff --git a/vllm/transformers_utils/configs/minimax_text_01.py b/vllm/transformers_utils/configs/minimax_text_01.py
deleted file mode 100644
index e3b63dfa00371..0000000000000
--- a/vllm/transformers_utils/configs/minimax_text_01.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-""" MiniMaxText01 model configuration"""
-
-from transformers.configuration_utils import PretrainedConfig
-
-
-class MiniMaxText01Config(PretrainedConfig):
-    model_type = "MiniMaxText01"
-    keys_to_ignore_at_inference = ["past_key_values"]
-
-    def __init__(
-        self,
-        vocab_size=32000,
-        hidden_size=4096,
-        intermediate_size=14336,
-        num_hidden_layers=32,
-        num_attention_heads=32,
-        num_key_value_heads=8,
-        hidden_act="silu",
-        max_position_embeddings=4096 * 32,
-        initializer_range=0.02,
-        rms_norm_eps=1e-5,
-        use_cache=True,
-        pad_token_id=None,
-        bos_token_id=None,
-        eos_token_id=None,
-        tie_word_embeddings=False,
-        rope_theta=1e6,
-        sliding_window=None,
-        attention_dropout=0.0,
-        num_experts_per_tok=2,
-        num_local_experts=8,
-        output_router_logits=False,
-        router_aux_loss_coef=0.001,
-        router_jitter_noise=0.0,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.sliding_window = sliding_window
-
-        # for backward compatibility
-        if num_key_value_heads is None:
-            num_key_value_heads = num_attention_heads
-
-        self.num_key_value_heads = num_key_value_heads
-        self.hidden_act = hidden_act
-        self.initializer_range = initializer_range
-        self.rms_norm_eps = rms_norm_eps
-        self.use_cache = use_cache
-        self.rope_theta = rope_theta
-        self.attention_dropout = attention_dropout
-
-        self.num_experts_per_tok = num_experts_per_tok
-        self.num_local_experts = num_local_experts
-        self.output_router_logits = output_router_logits
-        self.router_aux_loss_coef = router_aux_loss_coef
-        self.router_jitter_noise = router_jitter_noise
-        super().__init__(
-            pad_token_id=pad_token_id,
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            tie_word_embeddings=tie_word_embeddings,
-            **kwargs,
-        )
diff --git a/vllm/transformers_utils/configs/minimax_vl_01.py b/vllm/transformers_utils/configs/minimax_vl_01.py
deleted file mode 100644
index c62497192cc2a..0000000000000
--- a/vllm/transformers_utils/configs/minimax_vl_01.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""MiniMaxVL01 model configuration"""
-
-from transformers.configuration_utils import PretrainedConfig
-from transformers.models.auto import CONFIG_MAPPING
-
-from .minimax_text_01 import MiniMaxText01Config
-
-
-class MiniMaxVL01Config(PretrainedConfig):
-    model_type = "minimax_vl_01"
-
-    def __init__(
-        self,
-        vision_config=None,
-        text_config=None,
-        ignore_index=-100,
-        image_token_index=32000,
-        projector_hidden_act="gelu",
-        vision_feature_select_strategy="default",
-        vision_feature_layer=-2,
-        image_grid_pinpoints=None,
-        tie_word_embeddings=False,
-        image_seq_length=576,
-        **kwargs,
-    ):
-        self.ignore_index = ignore_index
-        self.image_token_index = image_token_index
-        self.projector_hidden_act = projector_hidden_act
-        self.image_seq_length = image_seq_length
-
-        if vision_feature_select_strategy not in ["default", "full"]:
-            raise ValueError("vision_feature_select_strategy should " +
-                             "be one of 'default', 'full'." +
-                             f"Got: {vision_feature_select_strategy}")
-
-        self.vision_feature_select_strategy = vision_feature_select_strategy
-        self.vision_feature_layer = vision_feature_layer
-        image_grid_pinpoints = (
-            image_grid_pinpoints if image_grid_pinpoints is not None else
-            [[336, 672], [672, 336], [672, 672], [1008, 336], [336, 1008]])
-        self.image_grid_pinpoints = image_grid_pinpoints
-
-        if isinstance(vision_config, dict):
-            if "model_type" not in vision_config:
-                vision_config["model_type"] = "clip_vision_model"
-            vision_config = CONFIG_MAPPING[vision_config["model_type"]](
-                **vision_config)
-        elif vision_config is None:
-            vision_config = CONFIG_MAPPING["clip_vision_model"](
-                intermediate_size=4096,
-                hidden_size=1024,
-                patch_size=14,
-                image_size=336,
-                num_hidden_layers=24,
-                num_attention_heads=16,
-                vocab_size=32000,
-                projection_dim=768,
-            )
-
-        self.vision_config = vision_config
-
-        if text_config is not None:
-            text_config = MiniMaxText01Config(**text_config)
-        else:
-            text_config = MiniMaxText01Config()
-
-        self.text_config = text_config
-
-        super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
diff --git a/vllm/transformers_utils/configs/mpt.py b/vllm/transformers_utils/configs/mpt.py
deleted file mode 100644
index 91316408dcd89..0000000000000
--- a/vllm/transformers_utils/configs/mpt.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# Copied from
-# https://huggingface.co/mosaicml/mpt-7b/blob/main/configuration_mpt.py
-"""A HuggingFace-style model configuration."""
-import warnings
-from typing import Any, Optional, Union
-
-from transformers import PretrainedConfig
-
-attn_config_defaults: dict = {
-    'attn_type': 'multihead_attention',
-    'attn_pdrop': 0.0,
-    'attn_impl': 'triton',
-    'qk_ln': False,
-    'clip_qkv': None,
-    'softmax_scale': None,
-    'prefix_lm': False,
-    'attn_uses_sequence_id': False,
-    'alibi': False,
-    'alibi_bias_max': 8
-}
-ffn_config_defaults: dict = {'ffn_type': 'mptmlp'}
-init_config_defaults: dict = {
-    'name': 'kaiming_normal_',
-    'fan_mode': 'fan_in',
-    'init_nonlinearity': 'relu',
-    'init_div_is_residual': True,
-    'emb_init_std': None,
-    'emb_init_uniform_lim': None,
-    'init_std': None,
-    'init_gain': 0.0
-}
-
-
-class MPTConfig(PretrainedConfig):
-    model_type = 'mpt'
-    attribute_map = {
-        'num_attention_heads': 'n_heads',
-        'hidden_size': 'd_model',
-        'num_hidden_layers': 'n_layers',
-    }
-
-    # pylint: disable=dangerous-default-value
-    def __init__(self,
-                 d_model: int = 2048,
-                 n_heads: int = 16,
-                 n_layers: int = 24,
-                 expansion_ratio: int = 4,
-                 max_seq_len: int = 2048,
-                 vocab_size: int = 50368,
-                 resid_pdrop: float = 0.0,
-                 emb_pdrop: float = 0.0,
-                 learned_pos_emb: bool = True,
-                 attn_config: dict = attn_config_defaults,
-                 ffn_config: dict = ffn_config_defaults,
-                 init_device: str = 'cpu',
-                 logit_scale: Optional[Union[float, str]] = None,
-                 no_bias: bool = False,
-                 embedding_fraction: float = 1.0,
-                 norm_type: str = 'low_precision_layernorm',
-                 use_cache: bool = False,
-                 init_config: dict = init_config_defaults,
-                 fc_type: str = 'torch',
-                 verbose: Optional[int] = None,
-                 **kwargs: Any):
-        self.d_model = d_model
-        self.n_heads = n_heads
-        self.n_layers = n_layers
-        self.expansion_ratio = expansion_ratio
-        self.max_seq_len = max_seq_len
-        self.vocab_size = vocab_size
-        self.resid_pdrop = resid_pdrop
-        self.emb_pdrop = emb_pdrop
-        self.learned_pos_emb = learned_pos_emb
-        self.attn_config = attn_config
-        self.ffn_config = ffn_config
-        self.init_device = init_device
-        self.logit_scale = logit_scale
-        self.no_bias = no_bias
-        self.embedding_fraction = embedding_fraction
-        self.norm_type = norm_type
-        self.use_cache = use_cache
-        self.init_config = init_config
-        self.fc_type = fc_type
-        if verbose is not None:
-            warnings.warn(DeprecationWarning(
-                'verbose argument for MPTConfig is now ignored and '
-                'will be removed. Use python_log_level instead.'),
-                          stacklevel=2)
-        if 'name' in kwargs:
-            del kwargs['name']
-        if 'loss_fn' in kwargs:
-            del kwargs['loss_fn']
-        if self.attn_config.get('alibi', False):
-            self.learned_pos_emb = False
-            warnings.warn(
-                f'alibi is turned on, setting `learned_pos_emb` '
-                f'to {self.learned_pos_emb}`',
-                stacklevel=2)
-        super().__init__(**kwargs)
-        self._validate_config()
-
-    def _set_config_defaults(
-            self, config: dict[str, Any],
-            config_defaults: dict[str, Any]) -> dict[str, Any]:
-        for (k, v) in config_defaults.items():
-            if k not in config:
-                config[k] = v
-        return config
-
-    def _validate_config(self) -> None:
-        self.attn_config = self._set_config_defaults(self.attn_config,
-                                                     attn_config_defaults)
-        self.ffn_config = self._set_config_defaults(self.ffn_config,
-                                                    ffn_config_defaults)
-        self.init_config = self._set_config_defaults(self.init_config,
-                                                     init_config_defaults)
-        if self.d_model % self.n_heads != 0:
-            raise ValueError('d_model must be divisible by n_heads')
-        if any(
-                prob < 0 or prob > 1 for prob in
-            [self.attn_config['attn_pdrop'], self.resid_pdrop, self.emb_pdrop
-             ]):
-            raise ValueError(
-                "self.attn_config['attn_pdrop'], resid_pdrop, emb_pdrop are "
-                "probabilities and must be between 0 and 1")
-        if self.attn_config['attn_impl'] not in ['torch', 'flash', 'triton']:
-            raise ValueError(
-                f"Unknown attn_impl={self.attn_config['attn_impl']}")
-        if self.attn_config['prefix_lm'] and self.attn_config[
-                'attn_impl'] not in ['torch', 'triton']:
-            raise NotImplementedError(
-                'prefix_lm only implemented with torch and triton attention.')
-        if self.attn_config['alibi'] and self.attn_config['attn_impl'] not in [
-                'torch', 'triton'
-        ]:
-            raise NotImplementedError(
-                'alibi only implemented with torch and triton attention.')
-        if self.attn_config['attn_uses_sequence_id'] and self.attn_config[
-                'attn_impl'] not in ['torch', 'triton']:
-            raise NotImplementedError(
-                'attn_uses_sequence_id only implemented with torch '
-                'and triton attention.')
-        if self.embedding_fraction > 1 or self.embedding_fraction <= 0:
-            raise ValueError(
-                'model.embedding_fraction must be between 0 (exclusive) '
-                'and 1 (inclusive)!')
-        if isinstance(self.logit_scale,
-                      str) and self.logit_scale != 'inv_sqrt_d_model':
-            raise ValueError(
-                f"self.logit_scale={self.logit_scale!r} is not recognized as "
-                "an option; use numeric value or 'inv_sqrt_d_model'.")
-        if self.init_config.get('name', None) is None:
-            raise ValueError(
-                f"self.init_config={self.init_config!r} 'name' needs to be set."
-            )
-        if not self.learned_pos_emb and (not self.attn_config['alibi']):
-            warnings.warn(
-                'Positional information not being provided to the model.',
-                stacklevel=2)
-        if self.fc_type == 'te' or self.ffn_config['ffn_type'] == 'te_ln_mlp':
-            try:
-                # pylint: disable=import-outside-toplevel
-                import transformer_engine.pytorch as te
-                del te
-            except Exception as exc:
-                raise ImportError(
-                    'TransformerEngine import fail. `fc_type: te` requires '
-                    'TransformerEngine be installed. '
-                    'The required version of transformer_engine also requires '
-                    'FlashAttention v1.0.6 is installed:\n'
-                    'pip install flash-attn==1.0.6 --no-build-isolation \n'
-                    'pip install git+https://github.com/NVIDIA/TransformerEngine.git@144e4888b2cdd60bd52e706d5b7a79cb9c1a7156'
-                ) from exc
-        if self.ffn_config['ffn_type'] == 'mptmlp':
-            self.ffn_config['fc_type'] = self.fc_type
-        elif self.ffn_config['ffn_type'] == 'te_ln_mlp':
-            self.ffn_config['bias'] = not self.no_bias
diff --git a/vllm/transformers_utils/configs/nvlm_d.py b/vllm/transformers_utils/configs/nvlm_d.py
deleted file mode 100644
index edfc506882ff5..0000000000000
--- a/vllm/transformers_utils/configs/nvlm_d.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# Adapted from
-# https://huggingface.co/nvidia/NVLM-D-72B/blob/main/configuration_nvlm_d.py
-# --------------------------------------------------------
-# NVLM-D
-# Copyright (c) 2024 NVIDIA
-# Licensed under Apache 2.0 License [see LICENSE for details]
-# --------------------------------------------------------
-from transformers import Qwen2Config
-from transformers.configuration_utils import PretrainedConfig
-
-
-class NVLM_D_Config(PretrainedConfig):
-    model_type = 'NVLM_D'
-    is_composition = True
-
-    def __init__(self, vision_config=None, llm_config=None, **kwargs):
-        super().__init__(**kwargs)
-
-        # Handle vision_config initialization
-        if vision_config is None:
-            vision_config = {}
-
-        # Handle llm_config initialization
-        if llm_config is None:
-            llm_config = {}
-
-        self.vision_config = PretrainedConfig(**vision_config)
-        self.text_config = Qwen2Config(**llm_config)
diff --git a/vllm/transformers_utils/configs/ovis.py b/vllm/transformers_utils/configs/ovis.py
deleted file mode 100644
index 021d402a71f4c..0000000000000
--- a/vllm/transformers_utils/configs/ovis.py
+++ /dev/null
@@ -1,184 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# yapf: disable
-# ruff: noqa: E501
-# copied from https://huggingface.co/AIDC-AI/Ovis2-1B/blob/main/configuration_aimv2.py
-# and https://huggingface.co/AIDC-AI/Ovis2-1B/blob/main/configuration_ovis.py
-from typing import Any, Optional, Union
-
-from transformers import AutoConfig, PretrainedConfig
-
-
-class AIMv2Config(PretrainedConfig):
-    """This is the configuration class to store the configuration of an [`AIMv2Model`].
-
-    Instantiating a configuration with the defaults will yield a similar configuration
-    to that of the [apple/aimv2-large-patch14-224](https://huggingface.co/apple/aimv2-large-patch14-224).
-
-    Args:
-        hidden_size: Dimension of the hidden representations.
-        intermediate_size: Dimension of the SwiGLU representations.
-        num_hidden_layers: Number of hidden layers in the Transformer.
-        num_attention_heads: Number of attention heads for each attention layer
-            in the Transformer.
-        num_channels: Number of input channels.
-        image_size: Image size.
-        patch_size: Patch size.
-        rms_norm_eps: Epsilon value used for the RMS normalization layer.
-        attention_dropout: Dropout ratio for attention probabilities.
-        projection_dropout: Dropout ratio for the projection layer after the attention.
-        qkv_bias: Whether to add a bias to the queries, keys and values.
-        use_bias: Whether to add a bias in the feed-forward and projection layers.
-        kwargs: Keyword arguments for the [`PretrainedConfig`].
-    """
-
-    model_type: str = "aimv2"
-
-    def __init__(
-        self,
-        hidden_size: int = 1024,
-        intermediate_size: int = 2816,
-        num_hidden_layers: int = 24,
-        num_attention_heads: int = 8,
-        num_channels: int = 3,
-        image_size: int = 224,
-        patch_size: int = 14,
-        rms_norm_eps: float = 1e-5,
-        attention_dropout: float = 0.0,
-        projection_dropout: float = 0.0,
-        qkv_bias: bool = False,
-        use_bias: bool = False,
-        **kwargs: Any,
-    ):
-        super().__init__(**kwargs)
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.num_channels = num_channels
-        self.patch_size = patch_size
-        self.image_size = image_size
-        self.attention_dropout = attention_dropout
-        self.rms_norm_eps = rms_norm_eps
-
-        self.projection_dropout = projection_dropout
-        self.qkv_bias = qkv_bias
-        self.use_bias = use_bias
-
-
-IGNORE_ID = -100
-IMAGE_TOKEN_ID = -200
-IMAGE_TOKEN = "<image>"
-IMAGE_ATOM_ID = -300
-IMAGE_INDICATOR_IDS = [-301, -302, -303, -304, -305]
-
-
-# ----------------------------------------------------------------------
-#                     Visual Tokenizer Configuration
-# ----------------------------------------------------------------------
-class BaseVisualTokenizerConfig(PretrainedConfig):
-
-    def __init__(self,
-                 vocab_size=16384,
-                 tokenize_function="softmax",
-                 tau=1.0,
-                 depths=None,
-                 drop_cls_token=False,
-                 backbone_config: Optional[Union[PretrainedConfig,
-                                                 dict]] = None,
-                 hidden_stride: int = 1,
-                 **kwargs):
-        super().__init__(**kwargs)
-        self.vocab_size = vocab_size
-        self.tokenize_function = tokenize_function
-        self.tau = tau
-        if isinstance(depths, str):
-            depths = [int(x) for x in depths.split('|')]
-        self.depths = depths
-        self.backbone_kwargs = dict[str, Any]()
-        self.drop_cls_token = drop_cls_token
-        if backbone_config is not None:
-            assert isinstance(backbone_config, (PretrainedConfig, dict)), \
-                f"expect `backbone_config` to be instance of PretrainedConfig or dict, but got {type(backbone_config)} type"
-            if not isinstance(backbone_config, PretrainedConfig):
-                model_type = backbone_config['model_type']
-                if model_type != "aimv2":
-                    backbone_config.pop('model_type')
-                    backbone_config = AutoConfig.for_model(model_type, **backbone_config)
-                else:
-                    backbone_config = AIMv2Config(**backbone_config)
-        self.backbone_config = backbone_config
-        self.hidden_stride = hidden_stride
-
-
-class Aimv2VisualTokenizerConfig(BaseVisualTokenizerConfig):
-    model_type = "aimv2_visual_tokenizer"
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        if self.drop_cls_token:
-            self.drop_cls_token = False
-        if self.depths:
-            assert len(self.depths) == 1
-            self.backbone_kwargs['num_hidden_layers'] = self.depths[0]
-
-
-class SiglipVisualTokenizerConfig(BaseVisualTokenizerConfig):
-    model_type = "siglip_visual_tokenizer"
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        if self.drop_cls_token:
-            self.drop_cls_token = False
-        if self.depths:
-            assert len(self.depths) == 1
-            self.backbone_kwargs['num_hidden_layers'] = self.depths[0]
-
-
-AutoConfig.register("siglip_visual_tokenizer", SiglipVisualTokenizerConfig)
-AutoConfig.register("aimv2_visual_tokenizer", Aimv2VisualTokenizerConfig)
-
-
-# ----------------------------------------------------------------------
-#                           Ovis Configuration
-# ----------------------------------------------------------------------
-class OvisConfig(PretrainedConfig):
-    model_type = "ovis"
-
-    def __init__(self,
-                 llm_config: Optional[Union[PretrainedConfig, dict]] = None,
-                 visual_tokenizer_config: Optional[Union[PretrainedConfig,
-                                                         dict]] = None,
-                 multimodal_max_length=8192,
-                 hidden_size=None,
-                 conversation_formatter_class=None,
-                 llm_attn_implementation=None,
-                 disable_tie_weight=False,
-                 **kwargs):
-        super().__init__(**kwargs)
-        if llm_config is not None:
-            assert isinstance(llm_config, (PretrainedConfig, dict)), \
-                f"expect `llm_config` to be instance of PretrainedConfig or dict, but got {type(llm_config)} type"
-            if not isinstance(llm_config, PretrainedConfig):
-                model_type = llm_config['model_type']
-                llm_config.pop('model_type')
-                llm_config = AutoConfig.for_model(model_type, **llm_config)
-
-        # map llm_config to text_config
-        self.text_config = llm_config
-        if visual_tokenizer_config is not None:
-            assert isinstance(visual_tokenizer_config, (PretrainedConfig, dict)), \
-                f"expect `visual_tokenizer_config` to be instance of PretrainedConfig or dict, but got {type(visual_tokenizer_config)} type"
-            if not isinstance(visual_tokenizer_config, PretrainedConfig):
-                model_type = visual_tokenizer_config['model_type']
-                visual_tokenizer_config.pop('model_type')
-                visual_tokenizer_config = AutoConfig.for_model(
-                    model_type, **visual_tokenizer_config)
-
-        self.visual_tokenizer_config = visual_tokenizer_config
-        self.multimodal_max_length = multimodal_max_length
-        self.hidden_size = hidden_size
-        self.conversation_formatter_class = conversation_formatter_class
-        self.llm_attn_implementation = llm_attn_implementation
-        self.disable_tie_weight = disable_tie_weight
diff --git a/vllm/transformers_utils/configs/skyworkr1v.py b/vllm/transformers_utils/configs/skyworkr1v.py
deleted file mode 100644
index 33a45220e3159..0000000000000
--- a/vllm/transformers_utils/configs/skyworkr1v.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# Adapted from
-# https://huggingface.co/Skywork/Skywork-R1V-38B/blob/main/configuration_skywork_chat.py
-# --------------------------------------------------------
-# SkyworkR1V
-# Copyright (c) 2025 Skywork
-# Licensed under The MIT License [see LICENSE for details]
-# --------------------------------------------------------
-from transformers.configuration_utils import PretrainedConfig
-
-
-class SkyworkR1VChatConfig(PretrainedConfig):
-    model_type = 'internvl_chat'
-    is_composition = True
-
-    def __init__(self,
-                 vision_config=None,
-                 llm_config=None,
-                 use_backbone_lora=0,
-                 use_llm_lora=0,
-                 select_layer=-1,
-                 force_image_size=None,
-                 downsample_ratio=0.5,
-                 template=None,
-                 dynamic_image_size=False,
-                 use_thumbnail=False,
-                 ps_version='v1',
-                 min_dynamic_patch=1,
-                 max_dynamic_patch=6,
-                 **kwargs):
-        super().__init__(**kwargs)
-
-        if vision_config is None:
-            vision_config = {}
-
-        if llm_config is None:
-            llm_config = {}
-
-        self.vision_config = PretrainedConfig(**vision_config)
-        self.text_config = PretrainedConfig(**llm_config)
-
-        self.use_backbone_lora = use_backbone_lora
-        self.use_llm_lora = use_llm_lora
-        self.select_layer = select_layer
-        self.force_image_size = force_image_size
-        self.downsample_ratio = downsample_ratio
-        self.template = template
-        self.dynamic_image_size = dynamic_image_size
-        self.use_thumbnail = use_thumbnail
-        self.ps_version = ps_version  # pixel shuffle version
-        self.min_dynamic_patch = min_dynamic_patch
-        self.max_dynamic_patch = max_dynamic_patch
diff --git a/vllm/transformers_utils/configs/solar.py b/vllm/transformers_utils/configs/solar.py
deleted file mode 100644
index a83dfa40b43a5..0000000000000
--- a/vllm/transformers_utils/configs/solar.py
+++ /dev/null
@@ -1,247 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
-#
-# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
-# and OPT implementations in this library. It has been modified from its
-# original forms to accommodate minor architectural differences compared
-# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Solar model configuration"""
-
-from transformers import PretrainedConfig
-from transformers.utils import logging
-
-logger = logging.get_logger(__name__)
-
-
-class SolarConfig(PretrainedConfig):
-    r"""
-    This is the configuration class to store
-    the configuration of a [`SolarModel`].
-    It is used to instantiate an LLaMA model
-    according to the specified arguments,
-    defining the model architecture.
-    Instantiating a configuration with the
-    defaults will yield a similar
-    configuration to that of the LLaMA-7B.
-    Configuration objects inherit from [`PretrainedConfig`]
-    and can be used to control the model outputs.
-    Read the documentation from [`PretrainedConfig`] for more information.
-    Args:
-        vocab_size (`int`, *optional*, defaults to 32000):
-            Vocabulary size of the LLaMA model.
-            Defines the number of different tokens
-            that can be represented by the `inputs_ids`
-            passed when calling [`SolarModel`]
-        hidden_size (`int`, *optional*, defaults to 4096):
-            Dimension of the hidden representations.
-        intermediate_size (`int`, *optional*, defaults to 11008):
-            Dimension of the MLP representations.
-        num_hidden_layers (`int`, *optional*, defaults to 32):
-            Number of hidden layers in the Transformer decoder.
-        num_attention_heads (`int`, *optional*, defaults to 32):
-            Number of attention heads for each attention layer
-            in the Transformer decoder.
-        num_key_value_heads (`int`, *optional*):
-            This is the number of key_value heads that
-            should be used to implement Grouped Query Attention. If
-            `num_key_value_heads=num_attention_heads`,
-            the model will use Multi Head Attention (MHA), if
-            `num_key_value_heads=1` the model
-            will use Multi Query Attention (MQA)
-            otherwise GQA is used. When
-            converting a multi-head checkpoint to a GQA checkpoint,
-            each group key and value head should be constructed
-            by meanpooling all the original heads within that group.
-            For more details checkout [this paper]
-            (https://arxiv.org/pdf/2305.13245.pdf).
-            If it is not specified, will default to
-            `num_attention_heads`.
-        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
-            The non-linear activation function (function or string)
-            in the decoder.
-        max_position_embeddings (`int`, *optional*, defaults to 2048):
-            The maximum sequence length that this model might ever be used with.
-            Solar 1 supports up to 2048 tokens,
-            Solar 2 up to 4096, CodeSolar up to 16384.
-        initializer_range (`float`, *optional*, defaults to 0.02):
-            The standard deviation of
-            the truncated_normal_initializer for initializing
-            all weight matrices.
-        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
-            The epsilon used by the rms normalization layers.
-        use_cache (`bool`, *optional*, defaults to `True`):
-            Whether or not the model should return
-            the last key/values attentions (not used by all models). Only
-            relevant if `config.is_decoder=True`.
-        pad_token_id (`int`, *optional*):
-            Padding token id.
-        bos_token_id (`int`, *optional*, defaults to 1):
-            Beginning of stream token id.
-        eos_token_id (`int`, *optional*, defaults to 2):
-            End of stream token id.
-        pretraining_tp (`int`, *optional*, defaults to 1):
-            Experimental feature. Tensor parallelism rank
-            used during pretraining.
-            Please refer to [this
-            document](https://huggingface.co/docs/
-            transformers/main/
-            perf_train_gpu_many#tensor-parallelism)
-             to understand more about it. This value is
-            necessary to ensure exact reproducibility
-            of the pretraining results.
-            Please refer to [this
-            issue](https://github.com/pytorch/pytorch/issues/76232).
-        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
-            Whether to tie weight embeddings
-        rope_theta (`float`, *optional*, defaults to 10000.0):
-            The base period of the RoPE embeddings.
-        rope_scaling (`dict`, *optional*):
-            Dictionary containing the scaling configuration for
-            the RoPE embeddings.
-            Currently supports two scaling
-            strategies: linear and dynamic.
-            Their scaling factor must be a float greater than 1.
-            The expected format is
-            `{"type": strategy name, "factor": scaling factor}`.
-            When using this flag, don't update
-            `max_position_embeddings` to the expected new maximum.
-            See the following thread for more information on how
-            these scaling strategies behave:
-            https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/
-            dynamically_scaled_rope_further_increases/. This is an
-            experimental feature, subject to breaking
-            API changes in future versions.
-        attention_bias (`bool`, *optional*, defaults to `False`):
-            Whether to use a bias in the query, key, value
-            and output projection layers during self-attention.
-        attention_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for the attention probabilities.
-        mlp_bias (`bool`, *optional*, defaults to `False`):
-            Whether to use a bias in up_proj, down_proj and gate_proj
-            layers in the MLP layers.
-        sliding_window (`int`, *optional*, defaults to 2047):
-            Sliding window attention window size. If not specified,
-            will default to `2047`.
-    ```python
-    >>> from transformers import SolarModel, SolarConfig
-    >>> # Initializing a Solar-pro style configuration
-    >>> configuration = SolarConfig()
-    >>> # Initializing a model from the Solar-pro style configuration
-    >>> model = SolarModel(configuration)
-    >>> # Accessing the model configuration
-    >>> configuration = model.config
-    ```"""
-
-    model_type = "solar"
-    keys_to_ignore_at_inference = ["past_key_values"]
-
-    def __init__(
-        self,
-        vocab_size=32000,
-        hidden_size=4096,
-        intermediate_size=11008,
-        num_hidden_layers=32,
-        num_attention_heads=32,
-        num_key_value_heads=None,
-        hidden_act="silu",
-        max_position_embeddings=2048,
-        initializer_range=0.02,
-        rms_norm_eps=1e-6,
-        use_cache=True,
-        pad_token_id=None,
-        bos_token_id=1,
-        eos_token_id=2,
-        pretraining_tp=1,
-        tie_word_embeddings=False,
-        rope_theta=10000.0,
-        rope_scaling=None,
-        attention_bias=False,
-        attention_dropout=0.0,
-        mlp_bias=False,
-        sliding_window=2047,
-        bskcn_1=None,
-        bskcn_2=None,
-        bskcn_3=None,
-        bskcn_4=None,
-        bskcn_tv=None,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-
-        # for backward compatibility
-        if num_key_value_heads is None:
-            num_key_value_heads = num_attention_heads
-
-        self.num_key_value_heads = num_key_value_heads
-        self.hidden_act = hidden_act
-        self.initializer_range = initializer_range
-        self.rms_norm_eps = rms_norm_eps
-        self.pretraining_tp = pretraining_tp
-        self.use_cache = use_cache
-        self.rope_theta = rope_theta
-        self.rope_scaling = rope_scaling
-        self._rope_scaling_validation()
-        self.attention_bias = attention_bias
-        self.attention_dropout = attention_dropout
-        self.mlp_bias = mlp_bias
-        self.sliding_window = sliding_window
-        self.bskcn_1 = bskcn_1 if bskcn_1 is not None else [12, 20, 32, 44]
-        self.bskcn_2 = bskcn_2 if bskcn_2 is not None else [20, 32]
-        self.bskcn_3 = bskcn_3 if bskcn_3 is not None else [16, 24, 36, 48]
-        self.bskcn_4 = bskcn_4 if bskcn_4 is not None else [28, 40]
-        self.bskcn_tv = bskcn_tv if bskcn_tv is not None else [0.9, 0.8]
-
-        super().__init__(
-            pad_token_id=pad_token_id,
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            tie_word_embeddings=tie_word_embeddings,
-            **kwargs,
-        )
-
-    def _rope_scaling_validation(self):
-        """
-        Validate the `rope_scaling` configuration.
-        """
-        if self.rope_scaling is None:
-            return
-
-        if (not isinstance(self.rope_scaling, dict)
-                or len(self.rope_scaling) != 2):
-            raise ValueError(
-                "`rope_scaling` must be a dictionary with two fields,"
-                " `type` and `factor`, "
-                f"got {self.rope_scaling}")
-        rope_scaling_type = self.rope_scaling.get("type", None)
-        rope_scaling_factor = self.rope_scaling.get("factor", None)
-        if rope_scaling_type is None or rope_scaling_type not in [
-                "linear",
-                "dynamic",
-        ]:
-            raise ValueError(f"`rope_scaling`'s type field must be one of "
-                             f"['linear', 'dynamic'], got {rope_scaling_type}")
-        if (rope_scaling_factor is None
-                or not isinstance(rope_scaling_factor, float)
-                or rope_scaling_factor <= 1.0):
-            raise ValueError(
-                f"`rope_scaling`'s factor field must be a float > 1,"
-                f" got {rope_scaling_factor}")
diff --git a/vllm/transformers_utils/configs/telechat2.py b/vllm/transformers_utils/configs/telechat2.py
deleted file mode 100644
index 050a7851d143f..0000000000000
--- a/vllm/transformers_utils/configs/telechat2.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# adapted from https://www.modelscope.cn/models/TeleAI/TeleChat2-3B/resolve/master/configuration_telechat2.py
-""" Telechat configuration compatible with LlamaConfig. """
-
-from transformers.configuration_utils import PretrainedConfig
-
-
-class Telechat2Config(PretrainedConfig):
-
-    model_type = "telechat"
-    keys_to_ignore_at_inference = ["past_key_values"]
-    attribute_map = {
-        "num_hidden_layers": "n_layer",
-        "num_attention_heads": "n_head",
-        "intermediate_size": "ffn_hidden_size",
-        "rms_norm_eps": "layer_norm_epsilon"
-    }
-
-    def __init__(
-        self,
-        vocab_size=160256,
-        hidden_size=4096,
-        n_layer=30,
-        n_head=32,
-        layer_norm_epsilon=1e-5,
-        initializer_range=0.02,
-        use_cache=True,
-        bos_token_id=1,
-        eos_token_id=2,
-        apply_residual_connection_post_layernorm=False,
-        hidden_dropout=0.0,
-        attention_dropout=0.0,
-        ffn_hidden_size=12288,
-        training_seqlen=8192,
-        logn=True,
-        embed_layernorm=False,
-        hidden_act="silu",
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        n_embed = kwargs.pop("n_embed", None)
-        self.hidden_size = hidden_size if n_embed is None else n_embed
-        self.n_layer = n_layer
-        self.n_head = n_head
-        self.layer_norm_epsilon = layer_norm_epsilon
-        self.initializer_range = initializer_range
-        self.use_cache = use_cache
-        self.apply_residual_connection_post_layernorm = (
-            apply_residual_connection_post_layernorm)
-        self.hidden_dropout = hidden_dropout
-        self.attention_dropout = attention_dropout
-        self.bos_token_id = bos_token_id
-        self.eos_token_id = eos_token_id
-        self.logn = logn
-        self.training_seqlen = training_seqlen
-        self.embed_layernorm = embed_layernorm
-        self.num_key_value_heads = kwargs.pop("num_key_value_heads", None)
-        self.ffn_hidden_size = ffn_hidden_size
-        self.hidden_act = hidden_act
-        super().__init__(bos_token_id=bos_token_id,
-                         eos_token_id=eos_token_id,
-                         **kwargs)
diff --git a/vllm/transformers_utils/processors/__init__.py b/vllm/transformers_utils/processors/__init__.py
index 14d15f2bc1673..eca4d7c884dd3 100644
--- a/vllm/transformers_utils/processors/__init__.py
+++ b/vllm/transformers_utils/processors/__init__.py
@@ -1,5 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Multi-modal processors may be defined in this directory for the following
+reasons:
+
+- There is no processing file defined by HF Hub or Transformers library.
+- There is a need to override the existing processor to support vLLM.
+"""
 
 from vllm.transformers_utils.processors.deepseek_vl2 import (
     DeepseekVLV2Processor)

From 02f82fe4386b3e84eb0f06bfaf7744c5b4fdba4f Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Wed, 30 Jul 2025 14:58:57 +0800
Subject: [PATCH 046/224] [Doc] Update Intern-S1 info  (#21908)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 docs/models/supported_models.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index e2172051cd186..5a9823bb6bae7 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -595,7 +595,7 @@ See [this page](generative_models.md) for more information on how to use generat
 | `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
 | `H2OVLChatModel` | H2OVL | T + I<sup>E+</sup> | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc. | | ✅︎ | ✅︎ |
 | `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |
-| `InternS1ForConditionalGeneration` | Intern-S1 | T + I<sup>E+</sup> + V<sup>E+</sup> | `internlm/Intern-S1`, etc. | | ✅︎ | ✅︎ |
+| `InternS1ForConditionalGeneration` | Intern-S1 | T + I<sup>E+</sup> + V<sup>E+</sup> | `internlm/Intern-S1`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `InternVLChatModel` | InternVL 3.0, InternVideo 2.5, InternVL 2.5, Mono-InternVL, InternVL 2.0 | T + I<sup>E+</sup> + (V<sup>E+</sup>) | `OpenGVLab/InternVL3-9B`, `OpenGVLab/InternVideo2_5_Chat_8B`, `OpenGVLab/InternVL2_5-4B`, `OpenGVLab/Mono-InternVL-2B`, `OpenGVLab/InternVL2-4B`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `KeyeForConditionalGeneration` | Keye-VL-8B-Preview | T + I<sup>E+</sup> + V<sup>E+</sup> | `Kwai-Keye/Keye-VL-8B-Preview` | | | ✅︎ |
 | `KimiVLForConditionalGeneration` | Kimi-VL-A3B-Instruct, Kimi-VL-A3B-Thinking | T + I<sup>+</sup> | `moonshotai/Kimi-VL-A3B-Instruct`, `moonshotai/Kimi-VL-A3B-Thinking` | | | ✅︎ |

From 30ef30ed5af77829771aec485e0f41d05d4d9880 Mon Sep 17 00:00:00 2001
From: Kebe <mail@kebe7jun.com>
Date: Wed, 30 Jul 2025 15:37:59 +0800
Subject: [PATCH 047/224] [CI] rollback lint-and-deploy pipeline using amd
 machine (#21912)

Signed-off-by: Kebe <mail@kebe7jun.com>
---
 .github/workflows/lint-and-deploy.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml
index d5736c0aee208..74a7a3a3530f5 100644
--- a/.github/workflows/lint-and-deploy.yaml
+++ b/.github/workflows/lint-and-deploy.yaml
@@ -7,7 +7,7 @@ permissions:
 
 jobs:
   lint-and-deploy:
-    runs-on: ubuntu-24.04-arm
+    runs-on: ubuntu-latest
     steps:
       - name: Checkout
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

From 547795232de307e0bff5e779530f01d7e6f4a9ad Mon Sep 17 00:00:00 2001
From: Varun Vinayak Shenoy <varun.vinayak.shenoy@oracle.com>
Date: Wed, 30 Jul 2025 00:44:15 -0700
Subject: [PATCH 048/224] [Tests] Fixing bug inside MultiModalProfiler.
 (#21842)

Signed-off-by: Varun Shenoy <varun.vinayak.shenoy@oracle.com>
---
 .../multimodal/processing/test_mllama4.py     | 67 +++++++++++++++++++
 tests/models/registry.py                      |  4 +-
 2 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 tests/models/multimodal/processing/test_mllama4.py

diff --git a/tests/models/multimodal/processing/test_mllama4.py b/tests/models/multimodal/processing/test_mllama4.py
new file mode 100644
index 0000000000000..f3871b60c3f64
--- /dev/null
+++ b/tests/models/multimodal/processing/test_mllama4.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for mllama's multimodal preprocessing and profiling."""
+import pytest
+from torch import prod
+from transformers import Llama4Config
+
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.profiling import MultiModalProfiler
+
+from ...utils import build_model_context
+
+
+@pytest.mark.parametrize("model_id", ["meta-llama/Llama-Guard-4-12B"])
+@pytest.mark.parametrize("max_model_len", [4096, 8192, 25600, 131072])
+def test_profiling(model_id: str, max_model_len: int):
+    model_config_kwargs = {
+        "max_model_len": max_model_len,
+    }
+    ctx = build_model_context(
+        model_id,
+        model_config_kwargs=model_config_kwargs,
+        limit_mm_per_prompt={"image": 1},
+    )
+
+    mm_config = ctx.get_mm_config()
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+    profiler = MultiModalProfiler(processor)
+
+    decoder_dummy_data = profiler.get_decoder_dummy_data(
+        max_model_len,
+        mm_counts=mm_config.limit_per_prompt,
+    )
+    dummy_mm_data = processor.dummy_inputs.get_dummy_processor_inputs(
+        max_model_len,
+        mm_counts=mm_config.limit_per_prompt,
+    )
+
+    hf_config = ctx.get_hf_config(Llama4Config)
+
+    mm_kwargs = processor.apply(
+        prompt=dummy_mm_data.prompt,
+        mm_data=dummy_mm_data.mm_data,
+        hf_processor_mm_kwargs=dict(),
+    )["mm_kwargs"]
+
+    image_size = hf_config.vision_config.image_size
+    patch_size = hf_config.vision_config.patch_size
+    downsample_ratio = int(
+        round(1.0 / (hf_config.vision_config.pixel_shuffle_ratio**2)))
+    tokens_per_patch = ((image_size // patch_size)**2) // downsample_ratio
+    chunks_per_image = prod(mm_kwargs["patches_per_image"])
+    total_num_patches = chunks_per_image * tokens_per_patch
+    num_tiles = mm_kwargs["aspect_ratios"][0][0] * mm_kwargs["aspect_ratios"][
+        0][1]  # x-y seperator tokens
+    total_tokens = total_num_patches.item() + num_tiles.item(
+    ) + 3  # image start, image, image end
+
+    profiled_tokens = profiler.get_mm_max_contiguous_tokens(
+        max_model_len,
+        mm_counts=mm_config.limit_per_prompt,
+    )
+
+    assert total_tokens == profiled_tokens["image"]
+    assert total_tokens == sum(
+        placeholder.length for placeholder in
+        decoder_dummy_data.multi_modal_placeholders["image"])
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 4fcd02efb6d0b..caa691039fce3 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -391,7 +391,9 @@ _MULTIMODAL_EXAMPLE_MODELS = {
                                                       extras={"thinking": "moonshotai/Kimi-VL-A3B-Thinking"},  # noqa: E501
                                                       trust_remote_code=True),
     "Llama4ForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct",   # noqa: E501
-                                                      max_model_len=10240),
+                                                      max_model_len=10240,
+                                                      extras={"llama-guard-4": "meta-llama/Llama-Guard-4-12B"}, # noqa: E501
+                                                      ),
     "LlavaForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-1.5-7b-hf",
                                                      extras={"mistral": "mistral-community/pixtral-12b", # noqa: E501
                                                              "mistral-fp8": "nm-testing/pixtral-12b-FP8-dynamic"}),  # noqa: E501

From fc91da549978347a3b5f5ebe6e8cbeae6148e012 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Wed, 30 Jul 2025 15:55:03 +0800
Subject: [PATCH 049/224] [Model] Remove DSV2 unused code (#21903)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 vllm/model_executor/models/deepseek_v2.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py
index 79ddd3d0f6276..68a0a83d6204c 100644
--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@@ -830,20 +830,6 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
                                        sampling_metadata)
         return logits
 
-    def make_empty_intermediate_tensors(
-            self, batch_size: int, dtype: torch.dtype,
-            device: torch.device) -> IntermediateTensors:
-        return IntermediateTensors({
-            "hidden_states":
-            torch.zeros((batch_size, self.config.hidden_size),
-                        dtype=dtype,
-                        device=device),
-            "residual":
-            torch.zeros((batch_size, self.config.hidden_size),
-                        dtype=dtype,
-                        device=device),
-        })
-
     def load_weights(self, weights: Iterable[tuple[str,
                                                    torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [

From 533db0935da051ac793e8b22afbcb9ae9fa4255b Mon Sep 17 00:00:00 2001
From: Peter Pan <peter.pan@daocloud.io>
Date: Wed, 30 Jul 2025 16:15:43 +0800
Subject: [PATCH 050/224] [benchmark] add max-concurrency in result table
 (#21095)

Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
---
 benchmarks/benchmark_serving.py                   | 4 ++++
 benchmarks/benchmark_serving_structured_output.py | 4 ++++
 vllm/benchmarks/serve.py                          | 6 ++++++
 3 files changed, 14 insertions(+)

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 53bd3247afbb6..3affa18ae3a4f 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -413,6 +413,10 @@ async def benchmark(
 
     print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
     print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
+    if max_concurrency is not None:
+        print("{:<40} {:<10}".format("Maximum request concurrency:", max_concurrency))
+    if request_rate != float("inf"):
+        print("{:<40} {:<10.2f}".format("Request rate configured (RPS):", request_rate))
     print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
     print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
     print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))
diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py
index d535cd5d7e1a6..2a22f122c78e6 100644
--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@@ -555,6 +555,10 @@ async def benchmark(
 
     print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
     print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
+    if max_concurrency is not None:
+        print("{:<40} {:<10}".format("Maximum request concurrency:", max_concurrency))
+    if request_rate != float("inf"):
+        print("{:<40} {:<10.2f}".format("Request rate configured (RPS):", request_rate))
     print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
     print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
     print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))
diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py
index 635363440c081..bd2b1e5990c83 100644
--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -486,6 +486,12 @@ async def benchmark(
 
     print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
     print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
+    if max_concurrency is not None:
+        print("{:<40} {:<10}".format("Maximum request concurrency:",
+                                     max_concurrency))
+    if request_rate != float('inf'):
+        print("{:<40} {:<10.2f}".format("Request rate configured (RPS):",
+                                        request_rate ))
     print("{:<40} {:<10.2f}".format("Benchmark duration (s):",
                                     benchmark_duration))
     print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))

From 5bbaf492a6238ff517249e73151ae9989f7bea9e Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 30 Jul 2025 16:32:39 +0800
Subject: [PATCH 051/224] [Doc] Update partial support (#21916)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 docs/features/compatibility_matrix.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/features/compatibility_matrix.md b/docs/features/compatibility_matrix.md
index 930265b8f9840..5b08b3810776c 100644
--- a/docs/features/compatibility_matrix.md
+++ b/docs/features/compatibility_matrix.md
@@ -41,17 +41,18 @@ th:not(:first-child) {
 | [LoRA](lora.md) | ✅ | ✅ | ✅ | | | | | | | | | | | |
 | [SD](spec_decode.md) | ✅ | ✅ | ❌ | ✅ | | | | | | | | | | |
 | CUDA graph | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | | |
-| [pooling](../models/pooling_models.md) | ✅\* | ✅\* | ✅ | ❌ | ✅ | ✅ | | | | | | | | |
+| [pooling](../models/pooling_models.md) | 🟠\* | 🟠\* | ✅ | ❌ | ✅ | ✅ | | | | | | | | |
 | <abbr title="Encoder-Decoder Models">enc-dec</abbr> | ❌ | [❌](gh-issue:7366) | ❌ | [❌](gh-issue:7366) | ✅ | ✅ | ✅ | | | | | | | |
 | <abbr title="Logprobs">logP</abbr> | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | | | | | | |
 | <abbr title="Prompt Logprobs">prmpt logP</abbr> | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | | | | | |
 | <abbr title="Async Output Processing">async output</abbr> | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | | | | |
 | multi-step | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | | | |
-| [mm](multimodal_inputs.md) | ✅ | ✅ | [🟠](gh-pr:4194) | ❔ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | | |
+| [mm](multimodal_inputs.md) | ✅ | ✅ | [🟠](gh-pr:4194)<sup>^</sup> | ❔ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | | |
 | best-of | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ✅ | ✅ | |
 | beam-search | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ❔ | ✅ | ✅ |
 
-\* Chunked prefill and prefix caching are only applicable to last-token pooling.
+\* Chunked prefill and prefix caching are only applicable to last-token pooling.  
+<sup>^</sup> LoRA is only applicable to the language backbone of multimodal models.
 
 [](){ #feature-x-hardware }
 

From 5c8fe389d6fb2b8776d4113d8334d8dd09f78733 Mon Sep 17 00:00:00 2001
From: Hongsheng Liu <liuhongsheng4@huawei.com>
Date: Wed, 30 Jul 2025 20:11:58 +0800
Subject: [PATCH 052/224] [Docs] Fix the example code of streaming chat
 completions in reasoning (#21825)

Signed-off-by: wangzi <3220100013@zju.edu.cn>
Co-authored-by: wangzi <3220100013@zju.edu.cn>
Co-authored-by: Zi Wang <66560864+BruceW-07@users.noreply.github.com>
---
 docs/features/reasoning_outputs.md                  | 13 ++++++-------
 ...enai_chat_completion_with_reasoning_streaming.py | 13 ++++++-------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/docs/features/reasoning_outputs.md b/docs/features/reasoning_outputs.md
index 6b84eca275309..04b943efbbbb4 100644
--- a/docs/features/reasoning_outputs.md
+++ b/docs/features/reasoning_outputs.md
@@ -123,13 +123,12 @@ OpenAI Python client library does not officially support `reasoning_content` att
     printed_content = False
 
     for chunk in stream:
-        reasoning_content = None
-        content = None
-        # Check the content is reasoning_content or content
-        if hasattr(chunk.choices[0].delta, "reasoning_content"):
-            reasoning_content = chunk.choices[0].delta.reasoning_content
-        elif hasattr(chunk.choices[0].delta, "content"):
-            content = chunk.choices[0].delta.content
+        # Safely extract reasoning_content and content from delta,
+        # defaulting to None if attributes don't exist or are empty strings
+        reasoning_content = (
+            getattr(chunk.choices[0].delta, "reasoning_content", None) or None
+        )
+        content = getattr(chunk.choices[0].delta, "content", None) or None
 
         if reasoning_content is not None:
             if not printed_reasoning_content:
diff --git a/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py b/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
index 5a91929770945..7d1ea37714599 100644
--- a/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
+++ b/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
@@ -51,13 +51,12 @@ def main():
     printed_content = False
 
     for chunk in stream:
-        reasoning_content = None
-        content = None
-        # Check the content is reasoning_content or content
-        if hasattr(chunk.choices[0].delta, "reasoning_content"):
-            reasoning_content = chunk.choices[0].delta.reasoning_content
-        elif hasattr(chunk.choices[0].delta, "content"):
-            content = chunk.choices[0].delta.content
+        # Safely extract reasoning_content and content from delta,
+        # defaulting to None if attributes don't exist or are empty strings
+        reasoning_content = (
+            getattr(chunk.choices[0].delta, "reasoning_content", None) or None
+        )
+        content = getattr(chunk.choices[0].delta, "content", None) or None
 
         if reasoning_content is not None:
             if not printed_reasoning_content:

From 13986365a9e669a8aa1abb308d48dfd276a4f97b Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 30 Jul 2025 14:42:51 +0200
Subject: [PATCH 053/224] Add @patrickvonplaten as maintainer of mistral's
 related files. (#21928)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
---
 .github/CODEOWNERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index fb9f44353cec8..5bc944296763d 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -65,3 +65,11 @@ mkdocs.yaml @hmellor
 # Qwen-specific files
 /vllm/attention/backends/dual_chunk_flash_attn.py @sighingnow
 /vllm/model_executor/models/qwen* @sighingnow
+
+# Mistral-specific files
+/vllm/model_executor/models/mistral*.py @patrickvonplaten
+/vllm/model_executor/models/mixtral*.py @patrickvonplaten
+/vllm/model_executor/models/voxtral*.py @patrickvonplaten
+/vllm/model_executor/models/pixtral*.py @patrickvonplaten
+/vllm/transformers_utils/configs/mistral.py @patrickvonplaten
+/vllm/transformers_utils/tokenizers/mistral.py @patrickvonplaten

From b876860c6214d03279e79e0babb7eb4e3e286cbd Mon Sep 17 00:00:00 2001
From: Eric Curtin <ecurtin@redhat.com>
Date: Wed, 30 Jul 2025 14:22:00 +0100
Subject: [PATCH 054/224] [Hardware][CPU] Build fix for ARM without BF16
 (#21848)

Signed-off-by: Eric Curtin <ecurtin@redhat.com>
---
 csrc/cpu/quant.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/csrc/cpu/quant.cpp b/csrc/cpu/quant.cpp
index c1f7c64ea2f49..6e120b8d20a7e 100644
--- a/csrc/cpu/quant.cpp
+++ b/csrc/cpu/quant.cpp
@@ -16,12 +16,14 @@ struct KernelVecType<float> {
   using cvt_vec_type = vec_op::FP32Vec16;
 };
 
+#if !defined(__aarch64__) || defined(ARM_BF16_SUPPORT)
 template <>
 struct KernelVecType<c10::BFloat16> {
   using load_vec_type = vec_op::BF16Vec16;
   using azp_adj_load_vec_type = vec_op::INT32Vec16;
   using cvt_vec_type = vec_op::FP32Vec16;
 };
+#endif
 
 template <>
 struct KernelVecType<c10::Half> {

From d979dd6bebb1857052e6beae682e5186f8447fde Mon Sep 17 00:00:00 2001
From: aladerran <108529629+aladerran@users.noreply.github.com>
Date: Wed, 30 Jul 2025 21:27:57 +0800
Subject: [PATCH 055/224] [Feature][EPLB] Add eplb support for Qwen3 (#20815)

Signed-off-by: aladerran <aladerran@gmail.com>
---
 vllm/model_executor/models/qwen3_moe.py | 166 ++++++++++++++++++++----
 1 file changed, 142 insertions(+), 24 deletions(-)

diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py
index 12899c28016b9..ca14fd06574ec 100644
--- a/vllm/model_executor/models/qwen3_moe.py
+++ b/vllm/model_executor/models/qwen3_moe.py
@@ -22,7 +22,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Inference-only Qwen3MoE model compatible with HuggingFace weights."""
-from collections.abc import Iterable
+import typing
+from collections.abc import Callable, Iterable
 from typing import Any, Optional, Union
 
 import torch
@@ -31,8 +32,9 @@ from transformers import PretrainedConfig
 
 from vllm.attention import Attention
 from vllm.compilation.decorators import support_torch_compile
-from vllm.config import CacheConfig, VllmConfig
-from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
+from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config
+from vllm.distributed import (get_ep_group, get_pp_group,
+                              get_tensor_model_parallel_world_size)
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.fused_moe import FusedMoE
@@ -50,8 +52,8 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors
 
-from .interfaces import SupportsLoRA, SupportsPP
-from .utils import (AutoWeightsLoader, extract_layer_index,
+from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP
+from .utils import (AutoWeightsLoader, PPMissingLayer, extract_layer_index,
                     is_pp_missing_parameter,
                     make_empty_intermediate_tensors_factory, make_layers,
                     maybe_prefix)
@@ -101,23 +103,47 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
         config: PretrainedConfig,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
+        enable_eplb: bool = False,
     ):
         super().__init__()
         self.tp_size = get_tensor_model_parallel_world_size()
 
+        self.ep_group = get_ep_group().device_group
+        self.ep_rank = self.ep_group.rank()
+        self.ep_size = self.ep_group.size()
+        self.n_routed_experts = config.num_experts
+
         if self.tp_size > config.num_experts:
             raise ValueError(
                 f"Tensor parallel size {self.tp_size} is greater than "
                 f"the number of experts {config.num_experts}.")
 
-        self.experts = FusedMoE(num_experts=config.num_experts,
+        # Load balancing settings.
+        vllm_config = get_current_vllm_config()
+        parallel_config = vllm_config.parallel_config
+        self.enable_eplb = enable_eplb
+
+        self.n_logical_experts = self.n_routed_experts
+        self.n_redundant_experts = parallel_config.num_redundant_experts
+        self.n_physical_experts = (self.n_logical_experts +
+                                   self.n_redundant_experts)
+        self.n_local_physical_experts = self.n_physical_experts // self.ep_size
+
+        self.physical_expert_start = (self.ep_rank *
+                                      self.n_local_physical_experts)
+        self.physical_expert_end = (self.physical_expert_start +
+                                    self.n_local_physical_experts)
+
+        self.experts = FusedMoE(num_experts=self.n_routed_experts,
                                 top_k=config.num_experts_per_tok,
                                 hidden_size=config.hidden_size,
                                 intermediate_size=config.moe_intermediate_size,
                                 reduce_results=False,
                                 renormalize=config.norm_topk_prob,
                                 quant_config=quant_config,
-                                prefix=f"{prefix}.experts")
+                                prefix=f"{prefix}.experts",
+                                enable_eplb=self.enable_eplb,
+                                num_redundant_experts=self.n_redundant_experts)
 
         self.gate = ReplicatedLinear(config.hidden_size,
                                      config.num_experts,
@@ -246,6 +272,7 @@ class Qwen3MoeDecoderLayer(nn.Module):
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
+        enable_eplb: bool = False,
     ) -> None:
         super().__init__()
         self.hidden_size = config.hidden_size
@@ -277,7 +304,8 @@ class Qwen3MoeDecoderLayer(nn.Module):
             (layer_idx + 1) % config.decoder_sparse_step == 0):
             self.mlp = Qwen3MoeSparseMoeBlock(config=config,
                                               quant_config=quant_config,
-                                              prefix=f"{prefix}.mlp")
+                                              prefix=f"{prefix}.mlp",
+                                              enable_eplb=enable_eplb)
         else:
             self.mlp = Qwen3MoeMLP(hidden_size=config.hidden_size,
                                    intermediate_size=config.intermediate_size,
@@ -323,6 +351,9 @@ class Qwen3MoeModel(nn.Module):
         config = vllm_config.model_config.hf_config
         cache_config = vllm_config.cache_config
         quant_config = vllm_config.quant_config
+        parallel_config = vllm_config.parallel_config
+        enable_eplb = parallel_config.enable_eplb
+        self.num_redundant_experts = parallel_config.num_redundant_experts
 
         self.padding_idx = config.pad_token_id
         self.vocab_size = config.vocab_size
@@ -336,7 +367,8 @@ class Qwen3MoeModel(nn.Module):
             lambda prefix: Qwen3MoeDecoderLayer(config=config,
                                                 cache_config=cache_config,
                                                 quant_config=quant_config,
-                                                prefix=prefix),
+                                                prefix=prefix,
+                                                enable_eplb=enable_eplb),
             prefix=f"{prefix}.layers",
         )
         self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
@@ -382,7 +414,8 @@ class Qwen3MoeModel(nn.Module):
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
             ckpt_up_proj_name="up_proj",
-            num_experts=self.config.num_experts)
+            num_experts=self.config.num_experts,
+            num_redundant_experts=self.num_redundant_experts)
 
     def load_weights(self, weights: Iterable[tuple[str,
                                                    torch.Tensor]]) -> set[str]:
@@ -433,27 +466,51 @@ class Qwen3MoeModel(nn.Module):
                 weight_loader(param, loaded_weight, shard_id)
                 break
             else:
+                is_expert_weight = False
                 for mapping in expert_params_mapping:
                     param_name, weight_name, expert_id, shard_id = mapping
                     if weight_name not in name:
                         continue
-                    name = name.replace(weight_name, param_name)
-                    # Skip layers on other devices.
-                    if is_pp_missing_parameter(name, self):
+
+                    # Anyway, this is an expert weight and should not be
+                    # attempted to load as other weights later
+                    is_expert_weight = True
+
+                    # Do not modify `name` since the loop may continue here
+                    # Instead, create a new variable
+                    name_mapped = name.replace(weight_name, param_name)
+
+                    if is_pp_missing_parameter(name_mapped, self):
                         continue
+
                     # Skip loading extra parameters for GPTQ/modelopt models.
-                    if name.endswith(
-                            ignore_suffixes) and name not in params_dict:
+                    if name_mapped.endswith(
+                            ignore_suffixes
+                    ) and name_mapped not in params_dict:
                         continue
-                    param = params_dict[name]
-                    weight_loader = param.weight_loader
-                    weight_loader(param,
-                                  loaded_weight,
-                                  name,
-                                  shard_id=shard_id,
-                                  expert_id=expert_id)
-                    break
+
+                    param = params_dict[name_mapped]
+                    # We should ask the weight loader to return success or not
+                    # here since otherwise we may skip experts with other
+                    # available replicas.
+                    weight_loader = typing.cast(Callable[..., bool],
+                                                param.weight_loader)
+                    success = weight_loader(param,
+                                            loaded_weight,
+                                            name_mapped,
+                                            shard_id=shard_id,
+                                            expert_id=expert_id,
+                                            return_success=True)
+                    if success:
+                        name = name_mapped
+                        break
                 else:
+                    if is_expert_weight:
+                        # We've checked that this is an expert weight
+                        # However it's not mapped locally to this rank
+                        # So we simply skip it
+                        continue
+
                     # Skip loading extra parameters for GPTQ/modelopt models.
                     if name.endswith(
                             ignore_suffixes) and name not in params_dict:
@@ -482,7 +539,8 @@ class Qwen3MoeModel(nn.Module):
         return loaded_params
 
 
-class Qwen3MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
+class Qwen3MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA,
+                          MixtureOfExperts):
     packed_modules_mapping = {
         "qkv_proj": [
             "q_proj",
@@ -514,6 +572,66 @@ class Qwen3MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
         self.make_empty_intermediate_tensors = (
             self.model.make_empty_intermediate_tensors)
 
+        # Set MoE hyperparameters
+        self.expert_weights = []
+
+        self.moe_layers: list[FusedMoE] = []
+        example_layer = None
+        for layer in self.model.layers:
+            if isinstance(layer, PPMissingLayer):
+                continue
+
+            assert isinstance(layer, Qwen3MoeDecoderLayer)
+            if isinstance(layer.mlp, Qwen3MoeSparseMoeBlock):
+                example_layer = layer.mlp
+                self.moe_layers.append(layer.mlp.experts)
+
+        if example_layer is None:
+            raise RuntimeError("No Qwen3MoE layer found in the model.layers.")
+
+        self.num_moe_layers = len(self.moe_layers)
+        self.num_expert_groups = 1
+        self.num_shared_experts = 0
+        self.num_logical_experts = example_layer.n_logical_experts
+        self.num_physical_experts = example_layer.n_physical_experts
+        self.num_local_physical_experts = example_layer.n_local_physical_experts
+        self.num_routed_experts = example_layer.n_routed_experts
+        self.num_redundant_experts = example_layer.n_redundant_experts
+
+    def set_eplb_state(
+        self,
+        expert_load_view: torch.Tensor,
+        logical_to_physical_map: torch.Tensor,
+        logical_replica_count: torch.Tensor,
+    ) -> None:
+        for layer_idx, layer in enumerate(self.moe_layers):
+            # Register the expert weights.
+            self.expert_weights.append(layer.get_expert_weights())
+            layer.set_eplb_state(
+                moe_layer_idx=layer_idx,
+                expert_load_view=expert_load_view,
+                logical_to_physical_map=logical_to_physical_map,
+                logical_replica_count=logical_replica_count,
+            )
+
+    def update_physical_experts_metadata(
+        self,
+        num_physical_experts: int,
+        num_local_physical_experts: int,
+    ) -> None:
+        assert self.num_local_physical_experts == num_local_physical_experts
+        self.num_physical_experts = num_physical_experts
+        self.num_local_physical_experts = num_local_physical_experts
+        self.num_redundant_experts = (num_physical_experts -
+                                      self.num_logical_experts)
+        for layer in self.model.layers:
+            if isinstance(layer.mlp, Qwen3MoeSparseMoeBlock):
+                moe = layer.mlp
+                moe.n_local_physical_experts = num_local_physical_experts
+                moe.n_physical_experts = num_physical_experts
+                moe.n_redundant_experts = self.num_redundant_experts
+                moe.experts.update_expert_map()
+
     def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
         return self.model.get_input_embeddings(input_ids)
 

From fcfd1eb9c556e295eb5708eb0f5e6ae775807775 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 30 Jul 2025 21:36:34 +0800
Subject: [PATCH 056/224] [Doc] Remove vLLM prefix and add citation for
 PagedAttention (#21910)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 .../paged_attention}/k_vecs.png               | Bin
 .../paged_attention}/key.png                  | Bin
 .../paged_attention}/logits_vec.png           | Bin
 .../paged_attention}/q_vecs.png               | Bin
 .../paged_attention}/query.png                | Bin
 .../paged_attention}/v_vec.png                | Bin
 .../paged_attention}/value.png                | Bin
 docs/design/paged_attention.md                |  29 ++++++++++++------
 docs/design/plugin_system.md                  |   2 +-
 docs/design/torch_compile.md                  |   2 +-
 10 files changed, 22 insertions(+), 11 deletions(-)
 rename docs/assets/{kernel => design/paged_attention}/k_vecs.png (100%)
 rename docs/assets/{kernel => design/paged_attention}/key.png (100%)
 rename docs/assets/{kernel => design/paged_attention}/logits_vec.png (100%)
 rename docs/assets/{kernel => design/paged_attention}/q_vecs.png (100%)
 rename docs/assets/{kernel => design/paged_attention}/query.png (100%)
 rename docs/assets/{kernel => design/paged_attention}/v_vec.png (100%)
 rename docs/assets/{kernel => design/paged_attention}/value.png (100%)

diff --git a/docs/assets/kernel/k_vecs.png b/docs/assets/design/paged_attention/k_vecs.png
similarity index 100%
rename from docs/assets/kernel/k_vecs.png
rename to docs/assets/design/paged_attention/k_vecs.png
diff --git a/docs/assets/kernel/key.png b/docs/assets/design/paged_attention/key.png
similarity index 100%
rename from docs/assets/kernel/key.png
rename to docs/assets/design/paged_attention/key.png
diff --git a/docs/assets/kernel/logits_vec.png b/docs/assets/design/paged_attention/logits_vec.png
similarity index 100%
rename from docs/assets/kernel/logits_vec.png
rename to docs/assets/design/paged_attention/logits_vec.png
diff --git a/docs/assets/kernel/q_vecs.png b/docs/assets/design/paged_attention/q_vecs.png
similarity index 100%
rename from docs/assets/kernel/q_vecs.png
rename to docs/assets/design/paged_attention/q_vecs.png
diff --git a/docs/assets/kernel/query.png b/docs/assets/design/paged_attention/query.png
similarity index 100%
rename from docs/assets/kernel/query.png
rename to docs/assets/design/paged_attention/query.png
diff --git a/docs/assets/kernel/v_vec.png b/docs/assets/design/paged_attention/v_vec.png
similarity index 100%
rename from docs/assets/kernel/v_vec.png
rename to docs/assets/design/paged_attention/v_vec.png
diff --git a/docs/assets/kernel/value.png b/docs/assets/design/paged_attention/value.png
similarity index 100%
rename from docs/assets/kernel/value.png
rename to docs/assets/design/paged_attention/value.png
diff --git a/docs/design/paged_attention.md b/docs/design/paged_attention.md
index ef525e8c60412..fb991a35caf30 100644
--- a/docs/design/paged_attention.md
+++ b/docs/design/paged_attention.md
@@ -1,7 +1,7 @@
-# vLLM Paged Attention
+# Paged Attention
 
 !!! warning
-    This document is being kept in the vLLM documentation for historical purposes.
+    This is a historical document based on the [original paper for vLLM](https://arxiv.org/abs/2309.06180).
     It no longer describes the code used in vLLM today.
 
 Currently, vLLM utilizes its own implementation of a multi-head query
@@ -140,7 +140,7 @@ const scalar_t* q_ptr = q + seq_idx * q_stride + head_idx * HEAD_SIZE;
 ```
 
 <figure markdown="span">
-  ![](../../assets/kernel/query.png){ align="center" alt="query" width="70%" }
+  ![](../assets/design/paged_attention/query.png){ align="center" alt="query" width="70%" }
 </figure>
 
 Each thread defines its own `q_ptr` which points to the assigned
@@ -149,7 +149,7 @@ and `HEAD_SIZE` is 128, the `q_ptr` points to data that contains
 total of 128 elements divided into 128 / 4 = 32 vecs.
 
 <figure markdown="span">
-  ![](../../assets/kernel/q_vecs.png){ align="center" alt="q_vecs" width="70%" }
+  ![](../assets/design/paged_attention/q_vecs.png){ align="center" alt="q_vecs" width="70%" }
 </figure>
 
 ```cpp
@@ -188,7 +188,7 @@ points to key token data based on `k_cache` at assigned block,
 assigned head and assigned token.
 
 <figure markdown="span">
-  ![](../../assets/kernel/key.png){ align="center" alt="key" width="70%" }
+  ![](../assets/design/paged_attention/key.png){ align="center" alt="key" width="70%" }
 </figure>
 
 The diagram above illustrates the memory layout for key data. It
@@ -203,7 +203,7 @@ elements for one token) that will be processed by 2 threads (one
 thread group) separately.
 
 <figure markdown="span">
-  ![](../../assets/kernel/k_vecs.png){ align="center" alt="k_vecs" width="70%" }
+  ![](../assets/design/paged_attention/k_vecs.png){ align="center" alt="k_vecs" width="70%" }
 </figure>
 
 ```cpp
@@ -362,15 +362,15 @@ later steps. Now, it should store the normalized softmax result of
 ## Value
 
 <figure markdown="span">
-  ![](../../assets/kernel/value.png){ align="center" alt="value" width="70%" }
+  ![](../assets/design/paged_attention/value.png){ align="center" alt="value" width="70%" }
 </figure>
 
 <figure markdown="span">
-  ![](../../assets/kernel/logits_vec.png){ align="center" alt="logits_vec" width="50%" }
+  ![](../assets/design/paged_attention/logits_vec.png){ align="center" alt="logits_vec" width="50%" }
 </figure>
 
 <figure markdown="span">
-  ![](../../assets/kernel/v_vec.png){ align="center" alt="v_vec" width="70%" }
+  ![](../assets/design/paged_attention/v_vec.png){ align="center" alt="v_vec" width="70%" }
 </figure>
 
 Now we need to retrieve the value data and perform dot multiplication
@@ -499,3 +499,14 @@ for (int i = 0; i < NUM_ROWS_PER_THREAD; i++) {
 Finally, we need to iterate over different assigned head positions
 and write out the corresponding accumulated result based on the
 `out_ptr`.
+
+## Citation
+
+```bibtex
+@inproceedings{kwon2023efficient,
+  title={Efficient Memory Management for Large Language Model Serving with PagedAttention},
+  author={Woosuk Kwon and Zhuohan Li and Siyuan Zhuang and Ying Sheng and Lianmin Zheng and Cody Hao Yu and Joseph E. Gonzalez and Hao Zhang and Ion Stoica},
+  booktitle={Proceedings of the ACM SIGOPS 29th Symposium on Operating Systems Principles},
+  year={2023}
+}
+```
diff --git a/docs/design/plugin_system.md b/docs/design/plugin_system.md
index 23a05ac719ce2..ca1c2c2305d91 100644
--- a/docs/design/plugin_system.md
+++ b/docs/design/plugin_system.md
@@ -1,4 +1,4 @@
-# vLLM's Plugin System
+# Plugin System
 
 The community frequently requests the ability to extend vLLM with custom features. To facilitate this, vLLM includes a plugin system that allows users to add custom features without modifying the vLLM codebase. This document explains how plugins work in vLLM and how to create a plugin for vLLM.
 
diff --git a/docs/design/torch_compile.md b/docs/design/torch_compile.md
index 2d76e7f3adc5c..47ac4958dbf7f 100644
--- a/docs/design/torch_compile.md
+++ b/docs/design/torch_compile.md
@@ -1,4 +1,4 @@
-# vLLM's `torch.compile` integration
+# `torch.compile` integration
 
 In vLLM's V1 architecture, `torch.compile` is enabled by default and is a critical part of the framework. This document gives a simple walk-through example to show how to understand the `torch.compile` usage.
 

From da3e0bd6e53f12bb18d518940e8150ba023956aa Mon Sep 17 00:00:00 2001
From: "rongfu.leng" <rongfu.leng@daocloud.io>
Date: Wed, 30 Jul 2025 21:51:58 +0800
Subject: [PATCH 057/224] [Bugfix] we should use metavar is not choices
 (#21902)

Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
---
 vllm/entrypoints/openai/cli_args.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py
index 2d19e16883aa2..282493e543552 100644
--- a/vllm/entrypoints/openai/cli_args.py
+++ b/vllm/entrypoints/openai/cli_args.py
@@ -194,7 +194,9 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
 
         # Special case: Tool call parser shows built-in options.
         valid_tool_parsers = list(ToolParserManager.tool_parsers.keys())
-        frontend_kwargs["tool_call_parser"]["choices"] = valid_tool_parsers
+        parsers_str = ",".join(valid_tool_parsers)
+        frontend_kwargs["tool_call_parser"]["metavar"] = (
+            f"{{{parsers_str}}} or name registered in --tool-parser-plugin")
 
         frontend_group = parser.add_argument_group(
             title="Frontend",

From bf668b5bf56644db8e90cd0d385b62cc15a4657a Mon Sep 17 00:00:00 2001
From: Yan Pashkovsky <Yanpas@users.noreply.github.com>
Date: Wed, 30 Jul 2025 15:03:23 +0100
Subject: [PATCH 058/224] [Feature] Support multiple api keys in server
 (#18548)

Signed-off-by: Yan Pashkovsky <yanp.bugz@gmail.com>
---
 docs/getting_started/quickstart.md    |  1 +
 vllm/entrypoints/openai/api_server.py | 12 +++----
 vllm/entrypoints/openai/cli_args.py   | 46 +++++++++++++--------------
 3 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index 74235db16a15d..3a93497fab137 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
 ```
 
 You can pass in the argument `--api-key` or environment variable `VLLM_API_KEY` to enable the server to check for API key in the header.
+You can pass multiple keys after `--api-key`, and the server will accept any of the keys passed, this can be useful for key rotation.
 
 ### OpenAI Completions API with vLLM
 
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index c375c8755108c..05d9a69a65f83 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
         2. The request path doesn't start with /v1 (e.g. /health).
     """
 
-    def __init__(self, app: ASGIApp, api_token: str) -> None:
+    def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
         self.app = app
-        self.api_token = api_token
+        self.api_tokens = {f"Bearer {token}" for token in tokens}
 
     def __call__(self, scope: Scope, receive: Receive,
                  send: Send) -> Awaitable[None]:
@@ -1255,7 +1255,7 @@ class AuthenticationMiddleware:
         headers = Headers(scope=scope)
         # Type narrow to satisfy mypy.
         if url_path.startswith("/v1") and headers.get(
-                "Authorization") != f"Bearer {self.api_token}":
+                "Authorization") not in self.api_tokens:
             response = JSONResponse(content={"error": "Unauthorized"},
                                     status_code=401)
             return response(scope, receive, send)
@@ -1303,7 +1303,7 @@ class ScalingMiddleware:
     """
     Middleware that checks if the model is currently scaling and
     returns a 503 Service Unavailable response if it is.
-    
+
     This middleware applies to all HTTP requests and prevents
     processing when the model is in a scaling state.
     """
@@ -1512,8 +1512,8 @@ def build_app(args: Namespace) -> FastAPI:
                             status_code=HTTPStatus.BAD_REQUEST)
 
     # Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
-    if token := args.api_key or envs.VLLM_API_KEY:
-        app.add_middleware(AuthenticationMiddleware, api_token=token)
+    if tokens := [key for key in (args.api_key or [envs.VLLM_API_KEY]) if key]:
+        app.add_middleware(AuthenticationMiddleware, tokens=tokens)
 
     if args.enable_request_id_headers:
         app.add_middleware(XRequestIdMiddleware)
diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py
index 282493e543552..dfbc9cde3d5b1 100644
--- a/vllm/entrypoints/openai/cli_args.py
+++ b/vllm/entrypoints/openai/cli_args.py
@@ -85,22 +85,22 @@ class FrontendArgs:
     """Allowed methods."""
     allowed_headers: list[str] = field(default_factory=lambda: ["*"])
     """Allowed headers."""
-    api_key: Optional[str] = None
-    """If provided, the server will require this key to be presented in the
-    header."""
+    api_key: Optional[list[str]] = None
+    """If provided, the server will require one of these keys to be presented in
+    the header."""
     lora_modules: Optional[list[LoRAModulePath]] = None
     """LoRA modules configurations in either 'name=path' format or JSON format
-    or JSON list format. Example (old format): `'name=path'` Example (new 
-    format): `{\"name\": \"name\", \"path\": \"lora_path\", 
+    or JSON list format. Example (old format): `'name=path'` Example (new
+    format): `{\"name\": \"name\", \"path\": \"lora_path\",
     \"base_model_name\": \"id\"}`"""
     chat_template: Optional[str] = None
-    """The file path to the chat template, or the template in single-line form 
+    """The file path to the chat template, or the template in single-line form
     for the specified model."""
     chat_template_content_format: ChatTemplateContentFormatOption = "auto"
     """The format to render message content within a chat template.
 
 * "string" will render the content as a string. Example: `"Hello World"`
-* "openai" will render the content as a list of dictionaries, similar to OpenAI 
+* "openai" will render the content as a list of dictionaries, similar to OpenAI
 schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
     response_role: str = "assistant"
     """The role name to return if `request.add_generation_prompt=true`."""
@@ -117,40 +117,40 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
     root_path: Optional[str] = None
     """FastAPI root_path when app is behind a path based routing proxy."""
     middleware: list[str] = field(default_factory=lambda: [])
-    """Additional ASGI middleware to apply to the app. We accept multiple 
-    --middleware arguments. The value should be an import path. If a function 
-    is provided, vLLM will add it to the server using 
-    `@app.middleware('http')`. If a class is provided, vLLM will 
+    """Additional ASGI middleware to apply to the app. We accept multiple
+    --middleware arguments. The value should be an import path. If a function
+    is provided, vLLM will add it to the server using
+    `@app.middleware('http')`. If a class is provided, vLLM will
     add it to the server using `app.add_middleware()`."""
     return_tokens_as_token_ids: bool = False
-    """When `--max-logprobs` is specified, represents single tokens as 
-    strings of the form 'token_id:{token_id}' so that tokens that are not 
+    """When `--max-logprobs` is specified, represents single tokens as
+    strings of the form 'token_id:{token_id}' so that tokens that are not
     JSON-encodable can be identified."""
     disable_frontend_multiprocessing: bool = False
-    """If specified, will run the OpenAI frontend server in the same process as 
+    """If specified, will run the OpenAI frontend server in the same process as
     the model serving engine."""
     enable_request_id_headers: bool = False
-    """If specified, API server will add X-Request-Id header to responses. 
+    """If specified, API server will add X-Request-Id header to responses.
     Caution: this hurts performance at high QPS."""
     enable_auto_tool_choice: bool = False
-    """If specified, exclude tool definitions in prompts when 
+    """If specified, exclude tool definitions in prompts when
     tool_choice='none'."""
     exclude_tools_when_tool_choice_none: bool = False
-    """Enable auto tool choice for supported models. Use `--tool-call-parser` 
+    """Enable auto tool choice for supported models. Use `--tool-call-parser`
     to specify which parser to use."""
     tool_call_parser: Optional[str] = None
-    """Select the tool call parser depending on the model that you're using. 
-    This is used to parse the model-generated tool call into OpenAI API format. 
-    Required for `--enable-auto-tool-choice`. You can choose any option from 
+    """Select the tool call parser depending on the model that you're using.
+    This is used to parse the model-generated tool call into OpenAI API format.
+    Required for `--enable-auto-tool-choice`. You can choose any option from
     the built-in parsers or register a plugin via `--tool-parser-plugin`."""
     tool_parser_plugin: str = ""
-    """Special the tool parser plugin write to parse the model-generated tool 
-    into OpenAI API format, the name register in this plugin can be used in 
+    """Special the tool parser plugin write to parse the model-generated tool
+    into OpenAI API format, the name register in this plugin can be used in
     `--tool-call-parser`."""
     log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
     """Path to logging config JSON file for both vllm and uvicorn"""
     max_log_len: Optional[int] = None
-    """Max number of prompt characters or prompt ID numbers being printed in 
+    """Max number of prompt characters or prompt ID numbers being printed in
     log. The default of None means unlimited."""
     disable_fastapi_docs: bool = False
     """Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""

From e91d3c9cda69b9770241c79fbf94f81f5576e7f4 Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@gmail.com>
Date: Wed, 30 Jul 2025 22:05:04 +0800
Subject: [PATCH 059/224] [misc] skip p2p check by default (#21904)

---
 vllm/envs.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/vllm/envs.py b/vllm/envs.py
index 50cb3b7d1b7aa..ec4b0888d0f40 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -668,12 +668,14 @@ environment_variables: dict[str, Callable[[], Any]] = {
     (os.environ.get("VLLM_ALLOW_RUNTIME_LORA_UPDATING", "0").strip().lower() in
      ("1", "true")),
 
-    # By default, vLLM will check the peer-to-peer capability itself,
-    # in case of broken drivers. See https://github.com/vllm-project/vllm/blob/a9b15c606fea67a072416ea0ea115261a2756058/vllm/distributed/device_communicators/custom_all_reduce_utils.py#L101-L108 for details. # noqa
-    # If this env var is set to 1, vLLM will skip the peer-to-peer check,
-    # and trust the driver's peer-to-peer capability report.
+    # We assume drivers can report p2p status correctly.
+    # If the program hangs when using custom allreduce,
+    # potantially caused by a bug in the driver (535 series),
+    # if might be helpful to set VLLM_SKIP_P2P_CHECK=0
+    # so that vLLM can verify if p2p is actually working.
+    # See https://github.com/vllm-project/vllm/blob/a9b15c606fea67a072416ea0ea115261a2756058/vllm/distributed/device_communicators/custom_all_reduce_utils.py#L101-L108 for details. # noqa
     "VLLM_SKIP_P2P_CHECK":
-    lambda: os.getenv("VLLM_SKIP_P2P_CHECK", "0") == "1",
+    lambda: os.getenv("VLLM_SKIP_P2P_CHECK", "1") == "1",
 
     # List of quantization kernels that should be disabled, used for testing
     # and performance comparisons. Currently only affects MPLinearKernel

From 0271c2ff2fd15bd1a7c19484572a81e056e75620 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Wed, 30 Jul 2025 10:15:02 -0400
Subject: [PATCH 060/224] [Test] Add Benchmark and Unit Test for
 `per_token_group_quant` (#21860)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 .../benchmark_per_token_group_quant.py        | 159 ++++++++++++++++++
 .../test_per_token_group_quant.py             |  31 +++-
 2 files changed, 189 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/kernels/benchmark_per_token_group_quant.py

diff --git a/benchmarks/kernels/benchmark_per_token_group_quant.py b/benchmarks/kernels/benchmark_per_token_group_quant.py
new file mode 100644
index 0000000000000..1ccb5e08b3d57
--- /dev/null
+++ b/benchmarks/kernels/benchmark_per_token_group_quant.py
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import argparse
+import math
+from contextlib import contextmanager
+from typing import Callable
+from unittest.mock import patch
+
+import torch
+
+from vllm.model_executor.layers.quantization.utils import fp8_utils, int8_utils
+from vllm.platforms import current_platform
+
+
+@contextmanager
+def _triton_mode():
+    """Temporarily force the Triton fallback path"""
+    with patch("vllm.platforms.current_platform.is_cuda", return_value=False):
+        yield
+
+
+def _time_cuda(
+    fn: Callable[[], tuple[torch.Tensor, torch.Tensor]],
+    warmup_iters: int,
+    bench_iters: int,
+) -> float:
+    # warmup
+    for _ in range(warmup_iters):
+        fn()
+    torch.cuda.synchronize()
+
+    start = torch.cuda.Event(enable_timing=True)
+    end = torch.cuda.Event(enable_timing=True)
+
+    start.record()
+    for _ in range(bench_iters):
+        fn()
+    end.record()
+    torch.cuda.synchronize()
+
+    return start.elapsed_time(end) / bench_iters  # ms/iter
+
+
+def _run_single(
+    shape: tuple[int, int],
+    group_size: int,
+    dtype: str,
+    *,
+    column_major: bool = False,
+    scale_ue8m0: bool = False,
+    warmup_iters: int,
+    bench_iters: int,
+) -> None:
+    num_tokens, hidden_dim = shape
+
+    device = torch.device("cuda")
+    torch.manual_seed(42)
+    x = torch.randn(num_tokens, hidden_dim, device=device, dtype=torch.bfloat16) * 8
+
+    if dtype == "fp8":
+
+        def cuda_impl():
+            return fp8_utils.per_token_group_quant_fp8(
+                x,
+                group_size,
+                column_major_scales=column_major,
+                use_ue8m0=scale_ue8m0,
+            )
+
+        def triton_impl():
+            with _triton_mode():
+                return fp8_utils.per_token_group_quant_fp8(
+                    x,
+                    group_size,
+                    column_major_scales=column_major,
+                    use_ue8m0=scale_ue8m0,
+                )
+    elif dtype == "int8":
+
+        def cuda_impl():
+            return int8_utils.per_token_group_quant_int8(x, group_size)
+
+        def triton_impl():
+            with _triton_mode():
+                return int8_utils.per_token_group_quant_int8(x, group_size)
+    else:
+        raise ValueError("dtype must be 'fp8' or 'int8'")
+
+    cuda_ms = _time_cuda(cuda_impl, warmup_iters, bench_iters)
+    triton_ms = _time_cuda(triton_impl, warmup_iters, bench_iters)
+
+    speedup = triton_ms / cuda_ms if cuda_ms else math.inf
+
+    cfg_desc = (
+        f"shape={shape}  gs={group_size:<3}  col_major={column_major:<5}  "
+        f"ue8m0={scale_ue8m0:<5}  dtype={dtype}"
+    )
+    print(
+        f"{cfg_desc:55} | CUDA {cuda_ms:7.3f} ms  | Triton {triton_ms:7.3f} ms  | "
+        f"speed-up ×{speedup:5.2f}"
+    )
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--warmup-iters", type=int, default=10)
+    parser.add_argument("--bench-iters", type=int, default=100)
+    parser.add_argument("--dtype", choices=["fp8", "int8", "both"], default="both")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    if not current_platform.is_cuda():
+        raise RuntimeError("CUDA device is required to run this benchmark.")
+
+    args = parse_args()
+    warmup_iters, bench_iters = args.warmup_iters, args.bench_iters
+
+    shapes = [(32, 128), (64, 256), (16, 512)]
+    group_sizes = [64, 128]
+
+    dtypes = ["fp8", "int8"] if args.dtype == "both" else [args.dtype]
+
+    header = (
+        "Configuration".ljust(55)
+        + " | "
+        + "CUDA (ms)".center(12)
+        + " | "
+        + "Triton (ms)".center(13)
+        + " | "
+        + "Speed-up"
+    )
+    print(header)
+    print("-" * len(header))
+
+    for dtype in dtypes:
+        for shape in shapes:
+            for gs in group_sizes:
+                if dtype == "fp8":
+                    for col_major in (False, True):
+                        for ue8m0 in (False, True):
+                            _run_single(
+                                shape,
+                                gs,
+                                dtype,
+                                column_major=col_major,
+                                scale_ue8m0=ue8m0,
+                                warmup_iters=warmup_iters,
+                                bench_iters=bench_iters,
+                            )
+                else:  # INT8 has no col-major / ue8m0 switches
+                    _run_single(
+                        shape,
+                        gs,
+                        dtype,
+                        warmup_iters=warmup_iters,
+                        bench_iters=bench_iters,
+                    )
diff --git a/tests/kernels/quantization/test_per_token_group_quant.py b/tests/kernels/quantization/test_per_token_group_quant.py
index f826983fe94e1..07f17d1efe641 100644
--- a/tests/kernels/quantization/test_per_token_group_quant.py
+++ b/tests/kernels/quantization/test_per_token_group_quant.py
@@ -5,7 +5,7 @@ from unittest.mock import patch
 import pytest
 import torch
 
-from vllm.model_executor.layers.quantization.utils import fp8_utils
+from vllm.model_executor.layers.quantization.utils import fp8_utils, int8_utils
 
 
 @pytest.mark.parametrize("shape", [(32, 128), (64, 256), (16, 512)])
@@ -42,3 +42,32 @@ def test_per_token_group_quant_fp8(shape, column_major: bool,
 
     assert torch.allclose(out_q.float(), ref_q.float(), atol=0.15, rtol=0.15)
     assert torch.allclose(scale, ref_s, atol=0.01, rtol=0.01)
+
+
+@pytest.mark.parametrize("shape", [(32, 128), (64, 256), (16, 512)])
+@pytest.mark.parametrize("group_size", [64, 128])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+def test_per_token_group_quant_int8(shape, group_size: int):
+    device = "cuda"
+
+    torch.manual_seed(42)
+    num_tokens, hidden_dim = shape
+
+    x = (torch.randn(
+        (num_tokens, hidden_dim), device=device, dtype=torch.bfloat16) * 8)
+
+    # cuda path
+    out_q, scale = int8_utils.per_token_group_quant_int8(
+        x,
+        group_size,
+    )
+
+    # triton ref
+    with patch("vllm.platforms.current_platform.is_cuda", return_value=False):
+        ref_q, ref_s = int8_utils.per_token_group_quant_int8(
+            x,
+            group_size,
+        )
+
+    assert torch.allclose(out_q.float(), ref_q.float(), atol=0.15, rtol=0.15)
+    assert torch.allclose(scale, ref_s, atol=0.01, rtol=0.01)

From 0e40b2607317515bd4e847490ebd77e88f92dc1d Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 30 Jul 2025 22:17:14 +0800
Subject: [PATCH 061/224] [CI/Build] Only run markdownlint in CI (#21892)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .github/workflows/matchers/markdownlint.json | 17 +++++++++++++++++
 .github/workflows/pre-commit.yml             |  1 +
 .pre-commit-config.yaml                      |  3 ++-
 3 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/matchers/markdownlint.json

diff --git a/.github/workflows/matchers/markdownlint.json b/.github/workflows/matchers/markdownlint.json
new file mode 100644
index 0000000000000..fe094a9badb25
--- /dev/null
+++ b/.github/workflows/matchers/markdownlint.json
@@ -0,0 +1,17 @@
+{
+  "problemMatcher": [
+    {
+      "owner": "markdownlint",
+      "pattern": [
+        {
+          "regexp": "^([^:]*):(\\d+):?(\\d+)?\\s([\\w-\\/]*)\\s(.*)$",
+          "file": 1,
+          "line": 2,
+          "column": 3,
+          "code": 4,
+          "message": 5
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 8e694d18134ef..835e91d91ae94 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -17,6 +17,7 @@ jobs:
       with:
         python-version: "3.12"
     - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+    - run: echo "::add-matcher::.github/workflows/matchers/markdownlint.json"
     - run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
     - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
       with:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 045096cb86369..612b290e88d46 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,8 +38,9 @@ repos:
 - repo: https://github.com/igorshubovych/markdownlint-cli
   rev: v0.45.0
   hooks:
-  - id: markdownlint-fix
+  - id: markdownlint
     exclude: '.*\.inc\.md'
+    stages: [manual] # Only run in CI
 - repo: https://github.com/rhysd/actionlint
   rev: v1.7.7
   hooks:

From 36ede4598949092be3b61418a5141cbe730d1098 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Wed, 30 Jul 2025 15:18:02 +0100
Subject: [PATCH 062/224] Reduce time wasted in GitHub Actions using
 `concurrency` (#21919)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .github/workflows/lint-and-deploy.yaml | 4 ++++
 .github/workflows/pre-commit.yml       | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml
index 74a7a3a3530f5..2b1086b7faf43 100644
--- a/.github/workflows/lint-and-deploy.yaml
+++ b/.github/workflows/lint-and-deploy.yaml
@@ -2,6 +2,10 @@ name: Lint and Deploy Charts
 
 on: pull_request
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 permissions:
   contents: read
 
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 835e91d91ae94..195579f206a2f 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -5,6 +5,10 @@ on:
   push:
     branches: [main]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 permissions:
   contents: read
 

From 8f4a1c9a04b36cb7527e67f1fea96c4f05ed0e03 Mon Sep 17 00:00:00 2001
From: Ruixiang Tan <819464715@qq.com>
Date: Wed, 30 Jul 2025 22:20:43 +0800
Subject: [PATCH 063/224] [Misc] Improve code readability of KVCacheManager
 (#21673)

Signed-off-by: tanruixiang <tanruixiang0104@gmail.com>
Signed-off-by: Ruixiang Tan <819464715@qq.com>
Signed-off-by: GitHub <noreply@github.com>
---
 tests/v1/core/test_kv_cache_utils.py         |  4 ++--
 vllm/v1/core/block_pool.py                   |  2 +-
 vllm/v1/core/kv_cache_coordinator.py         |  9 ++++++---
 vllm/v1/core/kv_cache_manager.py             |  5 +----
 vllm/v1/core/kv_cache_utils.py               |  8 --------
 vllm/v1/core/single_type_kv_cache_manager.py | 12 ++++++++----
 6 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py
index e9c6f1f95cd71..bff3724d95e68 100644
--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@@ -112,9 +112,9 @@ def test_kv_cache_block():
     assert block.block_hash is None
 
     # Test reference count manipulation
-    block.incr_ref()
+    block.ref_cnt += 1
     assert block.ref_cnt == 1
-    block.decr_ref()
+    block.ref_cnt -= 1
     assert block.ref_cnt == 0
 
     # Test block hash setting and resetting
diff --git a/vllm/v1/core/block_pool.py b/vllm/v1/core/block_pool.py
index 5bf4d3a2acb45..ad9854dd29c38 100644
--- a/vllm/v1/core/block_pool.py
+++ b/vllm/v1/core/block_pool.py
@@ -276,7 +276,7 @@ class BlockPool:
                 # candidate), so remove it.
                 if block.ref_cnt == 0 and not block.is_null:
                     self.free_block_queue.remove(block)
-                block.incr_ref()
+                block.ref_cnt += 1
 
     def free_blocks(self, ordered_blocks: Iterable[KVCacheBlock]) -> None:
         """Free a list of blocks. The blocks should be ordered by their
diff --git a/vllm/v1/core/kv_cache_coordinator.py b/vllm/v1/core/kv_cache_coordinator.py
index 258805843e227..f3a16d64e19fd 100644
--- a/vllm/v1/core/kv_cache_coordinator.py
+++ b/vllm/v1/core/kv_cache_coordinator.py
@@ -126,14 +126,17 @@ class KVCacheCoordinator(ABC):
     def get_num_common_prefix_blocks(self, request_id: str,
                                      num_running_requests: int) -> list[int]:
         """
-        Get the number of common prefix blocks for a request.
+        Get the number of common prefix blocks for all requests in the RUNNING
+        state for each kv cache group.
 
         Args:
             request_id: The request ID.
-            num_running_requests: The number of requests in the RUNNING state.
+            num_running_requests: The total number of requests in the RUNNING
+                state.
 
         Returns:
-            list[int]: The number of common prefix blocks.
+            list[int]: The number of common prefix blocks for all requests in
+                the RUNNING state for each kv cache group.
         """
         num_blocks_per_group = [
             manager.get_num_common_prefix_blocks(request_id,
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
index e820a0ad6d5d0..ce333dbe61a19 100644
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@@ -170,10 +170,6 @@ class KVCacheManager:
                                                self.block_size, request)
             self.req_to_block_hashes[request.request_id] = block_hashes
 
-        if self.log_stats:
-            assert self.prefix_cache_stats is not None
-            self.prefix_cache_stats.requests += 1
-
         # NOTE: When all tokens hit the cache, we must recompute the last token
         # to obtain logits. Thus, set max_cache_hit_length to prompt_length - 1.
         # This can trigger recomputation of an entire block, rather than just
@@ -187,6 +183,7 @@ class KVCacheManager:
 
         if self.log_stats:
             assert self.prefix_cache_stats is not None
+            self.prefix_cache_stats.requests += 1
             self.prefix_cache_stats.queries += request.num_tokens
             self.prefix_cache_stats.hits += num_new_computed_tokens
 
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 3a72ac271afa6..25520eb655111 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -154,14 +154,6 @@ class KVCacheBlock:
     # Whether the block is a null block that should never be cached.
     is_null: bool = False
 
-    # TODO(Jialin): For performance, let callers handle ref_cnt bumps to
-    # avoid function calls.
-    def incr_ref(self):
-        self.ref_cnt += 1
-
-    def decr_ref(self):
-        self.ref_cnt -= 1
-
     @property
     def block_hash(self) -> Optional[BlockHashWithGroupId]:
         return self._block_hash
diff --git a/vllm/v1/core/single_type_kv_cache_manager.py b/vllm/v1/core/single_type_kv_cache_manager.py
index 714f49494c9a1..8f310023a8cd3 100644
--- a/vllm/v1/core/single_type_kv_cache_manager.py
+++ b/vllm/v1/core/single_type_kv_cache_manager.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import itertools
 from abc import ABC, abstractmethod
 from collections import defaultdict
 from typing import Callable
@@ -177,14 +178,17 @@ class SingleTypeKVCacheManager(ABC):
     def get_num_common_prefix_blocks(self, request_id: str,
                                      num_running_requests: int) -> int:
         """
-        Get the number of common prefix blocks for a request.
+        Get the number of common prefix blocks for all requests in the RUNNING
+        state.
 
         Args:
             request_id: The request ID.
-            num_running_requests: The number of requests in the RUNNING state.
+            num_running_requests: The total number of requests in the RUNNING
+                state.
 
         Returns:
-            The number of common prefix blocks.
+            The number of common prefix blocks for all requests in the RUNNING
+                state.
         """
 
         raise NotImplementedError
@@ -264,7 +268,7 @@ class FullAttentionManager(SingleTypeKVCacheManager):
         computed_blocks: tuple[list[KVCacheBlock], ...] = tuple(
             [] for _ in range(len(kv_cache_group_ids)))
         max_num_blocks = max_length // kv_cache_spec.block_size
-        for i, block_hash in zip(range(max_num_blocks), block_hashes):
+        for block_hash in itertools.islice(block_hashes, max_num_blocks):
             # block_hashes is a chain of block hashes. If a block hash is not
             # in the cached_block_hash_to_id, the following block hashes are
             # not computed yet for sure.

From ff08e51940a77d2dd14a6c512bec4613d060b4fa Mon Sep 17 00:00:00 2001
From: "Po-Han Huang (NVIDIA)" <53919306+nvpohanh@users.noreply.github.com>
Date: Wed, 30 Jul 2025 22:33:40 +0800
Subject: [PATCH 064/224] [NVIDIA] Fix Llama4 Scout FP4 functionality issues
 (#21499)

Signed-off-by: Po-Han Huang <pohanh@nvidia.com>
---
 vllm/model_executor/layers/fused_moe/layer.py |  15 +-
 .../layers/quantization/modelopt.py           |   2 -
 vllm/model_executor/models/llama4.py          | 270 +++++++++++++-----
 3 files changed, 218 insertions(+), 69 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index 254cd2e10b8fb..e16fc13c945cf 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -874,6 +874,14 @@ class FusedMoE(torch.nn.Module):
         elif shard_id == "w2":
             param_data[expert_id] = loaded_weight
 
+    def _load_w13_weight_scale(self, shard_dim: int,
+                               loaded_weight: torch.Tensor,
+                               param: torch.Tensor, tp_rank: int):
+        shard_size = param.shape[shard_dim]
+        loaded_weight = loaded_weight.narrow(shard_dim, shard_size * tp_rank,
+                                             shard_size)
+        param.copy_(loaded_weight)
+
     def _load_model_weight_or_group_weight_scale(self,
                                                  shard_dim: int,
                                                  expert_data: torch.Tensor,
@@ -1123,7 +1131,12 @@ class FusedMoE(torch.nn.Module):
                 "weight_scale_2" in weight_name if uses_weight_scale_2 else
                 "weight_scale" in weight_name) or "input_scale" in weight_name
 
-            if per_tensor_conditions:
+            if "w13_weight_scale" in weight_name:
+                self._load_w13_weight_scale(shard_dim=shard_dim,
+                                            loaded_weight=loaded_weight,
+                                            param=param,
+                                            tp_rank=self.tp_rank)
+            elif per_tensor_conditions:
                 self._load_per_tensor_weight_scale(
                     shard_id=shard_id,
                     param=param,
diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py
index 38866586ae29e..8fbc3231d86c3 100644
--- a/vllm/model_executor/layers/quantization/modelopt.py
+++ b/vllm/model_executor/layers/quantization/modelopt.py
@@ -778,8 +778,6 @@ class ModelOptNvFp4LinearMethod(LinearMethodBase):
         # Swizzle the weight blockscale.
         # contracting dimension is input dimension
         # block_size = 16;
-        assert (layer.weight_scale.shape[1] % 16 == 0), (
-            "Expected weight_scale.dim(1) to be divisible by 16")
         assert (layer.weight_scale.dtype == torch.float8_e4m3fn), (
             "Weight Block scale must be represented as FP8-E4M3")
         swizzled_weight_scale = swizzle_blockscale(layer.weight_scale)
diff --git a/vllm/model_executor/models/llama4.py b/vllm/model_executor/models/llama4.py
index fab1c163ac288..470e701d98013 100644
--- a/vllm/model_executor/models/llama4.py
+++ b/vllm/model_executor/models/llama4.py
@@ -342,34 +342,94 @@ class Llama4Model(LlamaModel):
         expert_params_mapping: list[tuple[str, str, int, str]],
         fused: bool = True,
     ) -> bool:
+        """
+        Load MoE expert weights.
+
+        Args:
+            name: The name of the weight to load.
+            loaded_weight: The weight to load.
+            params_dict: The dictionary of module parameters.
+            loaded_params: The set of already loaded parameters.
+            expert_params_mapping: The mapping of expert parameters. Must be
+                generated by FusedMoE.make_expert_params_mapping().
+            fused: Whether the expert weights are fused into a single weight
+                tensor or are separate weight tensors for each expert.
+                When fused is True, loaded_weight should have shape of:
+                [num_experts, hidden_in, hidden_out] for gate/up/down proj and
+                [hidden_out, hidden_in] for the others like router.
+                When fused is False, loaded_weight should have shape of:
+                [hidden_out, hidden_in].
+
+        Returns:
+            True if loaded_weight is one of MoE weights and the MoE expert
+            weights are loaded successfully, False otherwise.
+        """
+
+        # Whether the MoE expert weights are loaded successfully.
         expert_param_loaded = False
-        if "experts.gate_up_proj" in name:
-            loaded_weight = loaded_weight.chunk(2, dim=-1)
+
+        # If fused is True, the loaded weight is in the layout of:
+        # [num_experts, hidden_in, hidden_out], so we must transpose the last
+        # two dimensions to match the expected layout of the parameters.
+        if fused and loaded_weight.ndim == 3:
+            loaded_weight = loaded_weight.transpose(-1, -2)
+
+            # If the gate_proj and up_proj weights are fused into a single
+            # weight tensor, we need to split the weight tensor into a tuple
+            # of two weight tensors along the hidden_out dimension.
+            if "experts.gate_up_proj" in name:
+                loaded_weight = loaded_weight.chunk(2, dim=-2)
+
+        # Iterate over all the expert parameters and load the weights if we find
+        # a match in weight name.
         for (param_name, weight_name, expert_id,
              shard_id) in expert_params_mapping:
+
+            # Get a view of the loaded_weight to avoid modifying the original
+            # one across iterations.
             new_loaded_weight = loaded_weight
+
+            # If expert weights are fused into a single weight tensor, remove
+            # the expert index from the expected weight name.
             if fused:
+                # The string between e_str and proj_str is the expert index.
                 e_str, _, proj_str, _ = weight_name.split('.')
                 weight_name = f"{e_str}.{proj_str}"
                 param_name = f"{param_name}weight"
+
+            # Skip if the current weight is not one of the MoE weights.
             if weight_name not in name:
                 continue
+
+            # Replace the weight name with the parameter name.
             full_param_name = name.replace(weight_name, param_name)
-            # Skip layers on other devices.
+
+            # Skip if the current weight corresponds to a parameter that
+            # does not exist on the current PP (pipeline parallel) rank.
             if is_pp_missing_parameter(name, self):
                 continue
+
+            # Skip if the current weight is for the bias.
             if ((name.endswith(".bias") or name.endswith("_bias"))
                     and name not in params_dict):
                 continue
+
             param = params_dict[full_param_name]
             weight_loader = param.weight_loader
+
             if fused:
+                # If the parameter is for w13 together, the corresponding weight
+                # will be a tuple, so we must select the correct weight
+                # depending on the shard id, which is either "w1" or "w3".
                 if "w13" in full_param_name:
+                    assert shard_id in ["w1", "w3"]
                     shard_idx = 0 if shard_id == "w1" else 1
                     new_loaded_weight = new_loaded_weight[shard_idx]
-                new_loaded_weight = new_loaded_weight.transpose(-1, -2)
+
+                # If EP (expert parallel) is enabled, update expert_id to the
+                # starting expert index for the current EP rank and extract the
+                # corresponding expert weights.
                 layer_idx = extract_layer_index(name)
-                # EP mapping
                 expert_map = self.layers[
                     layer_idx].feed_forward.experts.expert_map
                 if expert_map is not None:
@@ -382,6 +442,9 @@ class Llama4Model(LlamaModel):
             else:
                 # TODO: add EP support for non fused weights
                 pass
+
+            # Load the weight into the module parameter with corresponding
+            # shard id and expert id.
             weight_loader(param,
                           new_loaded_weight,
                           full_param_name,
@@ -390,10 +453,13 @@ class Llama4Model(LlamaModel):
 
             loaded_params.add(full_param_name)
             expert_param_loaded = True
+
         return expert_param_loaded
 
     def load_weights(self, weights: Iterable[tuple[str,
                                                    torch.Tensor]]) -> set[str]:
+        # Name mapping from the parameter name to the shard name and
+        # corresponding shard id.
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             (".qkv_proj", ".q_proj", "q"),
@@ -402,26 +468,43 @@ class Llama4Model(LlamaModel):
             (".gate_up_proj", ".gate_proj", 0),
             (".gate_up_proj", ".up_proj", 1),
         ]
+        # Indicate whether the expert weights are fused into a single weight
+        # tensor.
         fused_experts_params = False
+        # Expert parameter mapping for the case where the expert weights are
+        # not fused into a single weight tensor.
         expert_params_mapping = FusedMoE.make_expert_params_mapping(
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
             ckpt_up_proj_name="up_proj",
             num_experts=self.num_experts)
+        # Expert parameter mapping for the case where the expert weights are
+        # fused into a single weight tensor.
         expert_params_mapping_fused = FusedMoE.make_expert_params_mapping(
             ckpt_gate_proj_name="gate_up_proj",
             ckpt_down_proj_name="down_proj",
             ckpt_up_proj_name="gate_up_proj",
             num_experts=1)
+        # All the module parameters.
         params_dict = dict(self.named_parameters())
+        # The module parameters that have been loaded.
         loaded_params: set[str] = set()
+
+        # Iterate over all the weights and load them into module parameters.
         for name, loaded_weight in weights:
+
+            # If the name contains "experts.gate_up_proj" or "experts.down_proj"
+            # without the expert indices, it means the expert weights are fused
+            # into a single weight tensor across all experts.
             if "experts.gate_up_proj" in name or "experts.down_proj" in name:
                 fused_experts_params = True
                 expert_params_mapping = expert_params_mapping_fused
+
+            # If kv cache quantization scales exist and the weight name
+            # corresponds to one of the kv cache quantization scales, load
+            # them.
             if (self.quant_config is not None and
                 (scale_name := self.quant_config.get_cache_scale(name))):
-                # Loading kv cache quantization scales
                 param = params_dict[scale_name]
                 weight_loader = getattr(param, "weight_loader",
                                         default_weight_loader)
@@ -430,84 +513,119 @@ class Llama4Model(LlamaModel):
                 weight_loader(param, loaded_weight)
                 loaded_params.add(scale_name)
                 continue
+
+            # Iterate over stacked_params_mapping to check if the current weight
+            # is one of the stacked parameters. If so, load the weight with the
+            # corresponding shard id. Note that MoE weights are handled
+            # separately in the else block.
             for param_name, weight_name, shard_id in stacked_params_mapping:
+                # Skip if the current weight is not one of the stacked
+                # parameters or if the current weight is a MoE weight.
                 if weight_name not in name or "experts" in name:
                     continue
-                # This check is for ModelOpt ckpts with kv cache quant enabled
+
+                # For ModelOpt checkpoints, we need to rename the self_attn
+                # weight/weight_scale names except for kv cache scales.
                 if not (name.endswith(
                     (".k_scale", ".v_scale")) and "self_attn" in name):
                     name = name.replace(weight_name, param_name)
+
+                # Skip if the current weight corresponds to a parameter that
+                # does not exist on the current PP (pipeline parallel) rank.
                 if is_pp_missing_parameter(name, self):
                     continue
-                if name.endswith("scale") and "expert" not in name:
-                    # Remapping the name of FP8 kv-scale.
+
+                # Remap kv cache scale names for ModelOpt checkpoints.
+                # TODO: ModelOpt should implement get_cache_scale() such that
+                #       kv cache scale name remapping can be done there.
+                if name.endswith("scale"):
                     name = maybe_remap_kv_scale_name(name, params_dict)
                     if name is None:
                         continue
+
+                # Load the weight into the module parameter with corresponding
+                # shard id and exit the for loop and the else block.
                 param = params_dict[name]
                 weight_loader = getattr(param, "weight_loader",
                                         default_weight_loader)
+
                 if weight_loader == default_weight_loader:
                     weight_loader(param, loaded_weight)
                 else:
                     weight_loader(param, loaded_weight, shard_id)
+
                 loaded_params.add(name)
                 break
+
+            # Handle normal (non-stacked) weights and MoE weights.
             else:
-                moe_loaded = self.load_moe_expert_weights(
-                    name,
-                    loaded_weight,
-                    params_dict,
-                    loaded_params,
-                    expert_params_mapping,
-                    fused=fused_experts_params)
+                # First, try to load MoE weights using load_moe_expert_weights.
+                # If successful, move on to next loaded weight.
+                if self.load_moe_expert_weights(name,
+                                                loaded_weight,
+                                                params_dict,
+                                                loaded_params,
+                                                expert_params_mapping,
+                                                fused=fused_experts_params):
+                    continue
 
-                if not moe_loaded:
-                    if is_pp_missing_parameter(name, self):
-                        continue
+                # Skip if the current weight corresponds to a parameter that
+                # does not exist on the current PP (pipeline parallel) rank.
+                if is_pp_missing_parameter(name, self):
+                    continue
 
-                    # Handle flat expert scale parameters that
-                    # don't match per-expert patterns
-                    if ("experts." in name and ("w13_input_scale" in name
-                                                or "w13_weight_scale" in name
-                                                or "w2_input_scale" in name
-                                                or "w2_weight_scale" in name)):
-                        # These are flat expert scales that apply to all experts
-                        param = params_dict[name]
-                        weight_loader = getattr(param, "weight_loader",
-                                                default_weight_loader)
-
-                        # Check for MoE-specific loading support via
-                        # attribute instead of expensive runtime reflection
-                        supports_moe = getattr(weight_loader,
-                                               'supports_moe_loading', False)
-
-                        if supports_moe:
-                            # This is a MoE weight loader
-                            if "w13_" in name:
-                                shard_id = "w1"
-                            elif "w2_" in name:
-                                shard_id = "w2"
-                            else:
-                                shard_id = "w1"
-
-                            weight_loader(param,
-                                          loaded_weight,
-                                          name,
-                                          shard_id=shard_id,
-                                          expert_id=0)
-                        else:
-                            # Regular weight loader (handles both
-                            # param.weight_loader and default_weight_loader)
-                            weight_loader(param, loaded_weight)
-                        loaded_params.add(name)
-                        continue
+                # Handle flat expert scale parameters that don't match
+                # per-expert patterns, i.e. one weight scale tensor for all
+                # experts.
+                scale_names = [
+                    "w13_input_scale", "w13_weight_scale", "w2_input_scale",
+                    "w2_weight_scale"
+                ]
+                if ("experts." in name and any(scale_name in name
+                                               for scale_name in scale_names)):
 
                     param = params_dict[name]
                     weight_loader = getattr(param, "weight_loader",
                                             default_weight_loader)
-                    weight_loader(param, loaded_weight)
+
+                    # If weight loader supports special moe loading, use it to
+                    # avoid expensive runtime reflection
+                    if getattr(weight_loader, 'supports_moe_loading', False):
+                        # Map the weight name to the corresponding shard id.
+                        shard_id = "w2" if "w2_" in name else "w1"
+
+                        # Transpose if weight scales are FP8 block scales with
+                        # three dimensions:
+                        # [num_experts, hidden_in, hidden_out].
+                        if name.endswith("weight_scale") \
+                            and loaded_weight.dtype == torch.float8_e4m3fn \
+                            and loaded_weight.ndim == 3:
+                            loaded_weight = loaded_weight.transpose(-1, -2)
+
+                        # Load the weight into the module parameter with
+                        # corresponding shard id and expert id.
+                        weight_loader(param,
+                                      loaded_weight,
+                                      name,
+                                      shard_id=shard_id,
+                                      expert_id=0)
+
+                    else:
+                        # Regular weight loader (handles both
+                        # param.weight_loader and default_weight_loader)
+                        weight_loader(param, loaded_weight)
+
                     loaded_params.add(name)
+                    continue
+
+                # Handle normal (non-stacked, non-MoE) weights.
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader",
+                                        default_weight_loader)
+                weight_loader(param, loaded_weight)
+                loaded_params.add(name)
+
+        # Finally, return the set of loaded parameters.
         return loaded_params
 
 
@@ -560,23 +678,43 @@ class Llama4ForCausalLM(LlamaForCausalLM):
         loaded_weight: torch.Tensor,
     ) -> tuple[str, torch.Tensor]:
 
-        def permute(w: torch.Tensor, n_heads: int):
+        # Helper function to permute the weight's channels
+        def permute(w: torch.Tensor, n_heads: int, is_weight_scale: bool):
+
+            # Calculate the expected shape of the weight.
+            # Do not rely on w's shape, as it may be in another layout.
             attn_in = self.config.head_dim * n_heads
             attn_out = self.config.hidden_size
 
+            # If the weight is FP4 packed as uint8, we need to divide attn_out
+            # by 2.
+            if w.dtype == torch.uint8 and w.shape[1] * 2 == attn_out:
+                attn_out = attn_out // 2
+
+            # If the weight is a weight scale, we need to divide attn_out by
+            # block size, which is currently 16.
+            elif w.dtype == torch.float8_e4m3fn and is_weight_scale \
+                and w.shape[1] * 16 == attn_out:
+                attn_out = attn_out // 16
+
             return w.view(n_heads, attn_in // n_heads // 2, 2,
                           attn_out).transpose(1, 2).reshape(attn_in, attn_out)
 
         modules = name.split(".")
 
-        # rotary embeds should be sliced
-        if ("wk" in modules or "k_proj" in modules) \
-           and modules[-1] == "weight":
-            loaded_weight = permute(loaded_weight,
-                                    self.config.num_key_value_heads)
-        elif ("wq" in modules or "q_proj" in modules) \
-                and modules[-1] == "weight":
-            loaded_weight = permute(loaded_weight,
-                                    self.config.num_attention_heads)
+        # Permute Q/K weights and weight block scales for rotary embedding
+        is_weight = modules[-1] == "weight"
+        is_nvfp4_weight_scale = (modules[-1] == "weight_scale" and
+                                 loaded_weight.dtype == torch.float8_e4m3fn)
+
+        if is_weight or is_nvfp4_weight_scale:
+            if ("wk" in modules or "k_proj" in modules):
+                loaded_weight = permute(loaded_weight,
+                                        self.config.num_key_value_heads,
+                                        is_nvfp4_weight_scale)
+            elif ("wq" in modules or "q_proj" in modules):
+                loaded_weight = permute(loaded_weight,
+                                        self.config.num_attention_heads,
+                                        is_nvfp4_weight_scale)
 
         return name, loaded_weight

From 88edf5994c123314cc3b18621352dd118bec2b99 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Wed, 30 Jul 2025 15:35:08 +0100
Subject: [PATCH 065/224] [Docs] Reduce the size of the built docs (#21920)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 mkdocs.yaml           | 7 +++++++
 requirements/docs.txt | 1 +
 2 files changed, 8 insertions(+)

diff --git a/mkdocs.yaml b/mkdocs.yaml
index 78f1c5b77cd07..e5b7454003310 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -67,6 +67,13 @@ plugins:
       exclude:
         - argparse/*
         - examples/*
+  - minify:
+      minify_html: true
+      minify_js: true
+      minify_css: true
+      cache_safe: true
+      js_files: [docs/mkdocs/javascript/*.js]
+      css_files: [docs/mkdocs/stylesheets/*.css]
   # For API reference generation
   - api-autonav:
       modules: ["vllm"]
diff --git a/requirements/docs.txt b/requirements/docs.txt
index 9e56c9573b33b..4d4fc7da6816d 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -6,6 +6,7 @@ mkdocs-gen-files
 mkdocs-awesome-nav
 mkdocs-glightbox
 mkdocs-git-revision-date-localized-plugin
+mkdocs-minify-plugin
 python-markdown-math
 regex
 ruff

From 6e599eebe8655dab75462a8a165f6d811d0d845f Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Wed, 30 Jul 2025 22:35:47 +0800
Subject: [PATCH 066/224] [Bugfix] Fix OOM tests in initialization test
 (#21921)

Signed-off-by: Isotr0py <2037008807@qq.com>
---
 tests/models/test_initialization.py   | 14 ++++++++------
 vllm/model_executor/models/glm4_1v.py |  1 +
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py
index d5441540176e8..4c7da24fca32a 100644
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@@ -33,12 +33,6 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
     model_info.check_available_online(on_fail="skip")
     model_info.check_transformers_version(on_fail="skip")
 
-    # FIXME: Possible memory leak in the previous tests?
-    if model_arch in ("Glm4vForConditionalGeneration",
-                      "GraniteSpeechForConditionalGeneration",
-                      "KimiVLForConditionalGeneration"):
-        pytest.skip("Avoid OOM")
-
     if model_arch in ("Llama4ForCausalLM", "EagleLlama4ForCausalLM"):
         from vllm.model_executor.models.llama4 import Llama4ForCausalLM
         from vllm.model_executor.models.registry import ModelRegistry
@@ -87,6 +81,14 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
                 "num_hidden_layers": 1,
             })
 
+        # e.g.: Qwen/Qwen2-Audio-7B-Instruct
+        if hasattr(hf_config, "audio_config"):
+            hf_config.audio_config.update({
+                "num_layers": 1,
+                "num_hidden_layers": 1,
+                "encoder_layers": 1,
+            })
+
         return hf_config
 
     # Avoid calling model.forward()
diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py
index 1fd65cc9099b7..ae1bf22c704e5 100644
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@@ -1275,6 +1275,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
             vllm_config=vllm_config,
             prefix=maybe_prefix(prefix, ""),
             architectures=["Glm4ForCausalLM"],
+            hf_config=self.config.get_text_config(),
         )
 
         self.make_empty_intermediate_tensors = (

From 366f6b3a4d92ee0b2df8e5620a88ddf57afc3681 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 30 Jul 2025 23:42:05 +0800
Subject: [PATCH 067/224] [Bugfix] Fix multi-api server not working for text
 models (#21933)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/config.py | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 8e8c1198833c2..012a791a3c872 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -856,7 +856,7 @@ class ModelConfig:
             self.tokenizer = s3_tokenizer.dir
 
     def _init_multimodal_config(self) -> Optional["MultiModalConfig"]:
-        if self.registry.is_multimodal_model(self.architectures, self):
+        if self._model_info.supports_multimodal:
             return MultiModalConfig(
                 limit_per_prompt=self.limit_mm_per_prompt,
                 media_io_kwargs=self.media_io_kwargs,
@@ -865,19 +865,6 @@ class ModelConfig:
                 disable_mm_preprocessor_cache,
                 interleave_mm_strings=self.interleave_mm_strings)
 
-        if self.limit_mm_per_prompt:
-            raise ValueError("`limit_mm_per_prompt` is only supported for "
-                             "multimodal models.")
-        if self.mm_processor_kwargs:
-            raise ValueError("`mm_processor_kwargs` is only supported for "
-                             "multimodal models.")
-        if self.disable_mm_preprocessor_cache:
-            raise ValueError("`disable_mm_preprocessor_cache` is only "
-                             "supported for multimodal models.")
-        if self.interleave_mm_strings:
-            raise ValueError("`interleave_mm_strings` is only "
-                             "supported for multimodal models.")
-
         return None
 
     def _get_encoder_config(self):

From ad510309ee10e5182b99ee94ddc5ace716c65050 Mon Sep 17 00:00:00 2001
From: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com>
Date: Wed, 30 Jul 2025 08:54:15 -0700
Subject: [PATCH 068/224] Override attention metadata for fast prefill in some
 KV sharing setups (#21590)

Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
---
 tests/v1/e2e/test_kv_sharing_fast_prefill.py | 143 +++++++++++++++++++
 vllm/config.py                               |  15 ++
 vllm/engine/arg_utils.py                     |   6 +
 vllm/model_executor/models/gemma3n.py        |   1 +
 vllm/v1/attention/backends/utils.py          |  35 ++++-
 vllm/v1/worker/gpu_model_runner.py           | 113 +++++++++++----
 6 files changed, 287 insertions(+), 26 deletions(-)
 create mode 100644 tests/v1/e2e/test_kv_sharing_fast_prefill.py

diff --git a/tests/v1/e2e/test_kv_sharing_fast_prefill.py b/tests/v1/e2e/test_kv_sharing_fast_prefill.py
new file mode 100644
index 0000000000000..616fc7a860599
--- /dev/null
+++ b/tests/v1/e2e/test_kv_sharing_fast_prefill.py
@@ -0,0 +1,143 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import gc
+import random
+from typing import Optional, Union
+
+import pytest
+import torch
+
+from vllm import LLM, SamplingParams
+from vllm.config import CompilationConfig, CompilationLevel
+from vllm.forward_context import get_forward_context
+from vllm.model_executor.models.gemma3n import Gemma3nForConditionalGeneration
+from vllm.model_executor.models.registry import ModelRegistry
+from vllm.model_executor.models.utils import extract_layer_index
+from vllm.sequence import IntermediateTensors
+
+from ...utils import fork_new_process_for_each_test
+
+
+class TestGemma3nForConditionalGeneration(Gemma3nForConditionalGeneration):
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: Optional[IntermediateTensors] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[torch.Tensor, IntermediateTensors]:
+        hidden_states = self.model(input_ids, positions, intermediate_tensors,
+                                   inputs_embeds, **kwargs)
+        attn_metadata = get_forward_context().attn_metadata
+        # attn_metadata is None during dummy runs
+        if (attn_metadata is not None
+                and self.cache_config.kv_sharing_fast_prefill):
+            assert isinstance(attn_metadata, dict)  # true in V1
+            # Gemma3n-E2B has 30 layers, with last 20 layers being
+            # cross-decoder layers. Check attention metadata is correct
+            for layer_name, metadata in attn_metadata.items():
+                layer_idx = extract_layer_index(layer_name)
+                if layer_idx >= 20:
+                    assert hasattr(metadata, 'logits_indices_padded')
+                    assert hasattr(metadata, 'num_logits_indices')
+                else:
+                    assert not hasattr(metadata, 'logits_indices_padded')
+                    assert not hasattr(metadata, 'num_logits_indices')
+
+            # Last layer will be a KV sharing layer
+            layer_attn_metadata = attn_metadata[
+                self.model.language_model.layers[-1].self_attn.attn.layer_name]
+            logits_indices_padded = (layer_attn_metadata.logits_indices_padded)
+            assert logits_indices_padded is not None
+            num_logits_indices = layer_attn_metadata.num_logits_indices
+            assert num_logits_indices > 0
+            # Reset hidden states to random values and
+            # only set logits at logits_indices to valid values
+            # Because logits_indices are the only positions that are used
+            # for output token sampling, this still produces same outputs
+            logits_hs = hidden_states[logits_indices_padded]
+            hidden_states = torch.randn_like(hidden_states)
+            gen_indices = logits_indices_padded[:num_logits_indices]
+            hidden_states[gen_indices] = logits_hs[:num_logits_indices]
+
+        return hidden_states
+
+
+@pytest.fixture
+def test_prompts():
+    """
+    Adapted from tests/v1/e2e/test_spec_decode.py
+    """
+    prompt_types = ["repeat", "sentence"]
+    # Setting higher num prompts increases the chance of numerics mismatch
+    # due to matrix multiplication numerics depending on batch dimension
+    num_prompts = 10
+    prompts = []
+
+    random.seed(0)
+    random_prompt_type_choices = random.choices(prompt_types, k=num_prompts)
+
+    for kind in random_prompt_type_choices:
+        word_choices = ["test", "temp", "hello", "where"]
+        word = random.choice(word_choices)
+        if kind == "repeat":
+            prompt = f"""please repeat the word '{word}' 10 times."""
+        elif kind == "sentence":
+            prompt = f"""please give a ten-word sentence that
+            uses the word {word} at least once."""
+        else:
+            raise ValueError(f"Unknown prompt type: {kind}")
+        prompts.append(prompt)
+
+    return prompts
+
+
+@fork_new_process_for_each_test
+@pytest.mark.parametrize("enforce_eager", [True, False])
+def test_kv_sharing_fast_prefill(
+    monkeypatch: pytest.MonkeyPatch,
+    enforce_eager: bool,
+    test_prompts: list[str],
+):
+    ModelRegistry.register_model("Gemma3nForConditionalGeneration",
+                                 TestGemma3nForConditionalGeneration)
+    sampling_params = SamplingParams(temperature=0.0, max_tokens=100)
+    compilation_config = CompilationConfig(
+        # This allows vLLM compilation backend to handle allocating and
+        # managing buffers for cudagraph
+        cudagraph_copy_inputs=True,
+        level=CompilationLevel.PIECEWISE
+        if not enforce_eager else CompilationLevel.NO_COMPILATION)
+
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_USE_V1", "1")
+
+        llm = LLM(
+            model="google/gemma-3n-E2B-it",
+            enforce_eager=enforce_eager,
+            compilation_config=compilation_config,
+        )
+        ref_responses = llm.generate(test_prompts, sampling_params)
+
+        del llm
+        gc.collect()
+        torch.cuda.empty_cache()
+
+        llm = LLM(model="google/gemma-3n-E2B-it",
+                  enforce_eager=enforce_eager,
+                  compilation_config=compilation_config,
+                  kv_sharing_fast_prefill=True)
+        optimized_responses = llm.generate(test_prompts, sampling_params)
+
+        misses = 0
+
+        for ref_response, optimized_response in zip(ref_responses,
+                                                    optimized_responses):
+            if ref_response.outputs[0].text != optimized_response.outputs[
+                    0].text:
+                misses += 1
+
+        assert misses == 0
diff --git a/vllm/config.py b/vllm/config.py
index 012a791a3c872..a330bafb76332 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1795,6 +1795,16 @@ class CacheConfig:
     num_cpu_blocks: Optional[int] = field(default=None, init=False)
     """The number of blocks to allocate for CPU memory."""
 
+    kv_sharing_fast_prefill: bool = False
+    """This feature is work in progress and no prefill optimization takes place
+    with this flag enabled currently.
+
+    In some KV sharing setups, e.g. YOCO (https://arxiv.org/abs/2405.05254),
+    some layers can skip tokens corresponding to prefill. This flag enables
+    attention metadata for eligible layers to be overriden with metadata
+    necessary for implementating this optimization in some models (e.g. Gemma3n)
+    """
+
     def compute_hash(self) -> str:
         """
         WARNING: Whenever a new field is added to this config,
@@ -1836,6 +1846,11 @@ class CacheConfig:
                 "GPU memory utilization must be less than 1.0. Got "
                 f"{self.gpu_memory_utilization}.")
 
+        if self.kv_sharing_fast_prefill:
+            logger.warning_once(
+                "--kv-sharing-fast-prefill is currently work in progress "
+                "and not functional yet (i.e. no prefill savings)")
+
         return self
 
     def _verify_cache_dtype(self) -> None:
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 6bdc3c361af34..ababa49a53ae4 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -445,6 +445,9 @@ class EngineArgs:
     # DEPRECATED
     enable_prompt_adapter: bool = False
 
+    kv_sharing_fast_prefill: bool = \
+        CacheConfig.kv_sharing_fast_prefill
+
     def __post_init__(self):
         # support `EngineArgs(compilation_config={...})`
         # without having to manually construct a
@@ -697,6 +700,8 @@ class EngineArgs:
                                  **cache_kwargs["cpu_offload_gb"])
         cache_group.add_argument("--calculate-kv-scales",
                                  **cache_kwargs["calculate_kv_scales"])
+        cache_group.add_argument("--kv-sharing-fast-prefill",
+                                 **cache_kwargs["kv_sharing_fast_prefill"])
 
         # Multimodal related configs
         multimodal_kwargs = get_kwargs(MultiModalConfig)
@@ -1069,6 +1074,7 @@ class EngineArgs:
             prefix_caching_hash_algo=self.prefix_caching_hash_algo,
             cpu_offload_gb=self.cpu_offload_gb,
             calculate_kv_scales=self.calculate_kv_scales,
+            kv_sharing_fast_prefill=self.kv_sharing_fast_prefill,
         )
 
         # Get the current placement group if Ray is initialized and
diff --git a/vllm/model_executor/models/gemma3n.py b/vllm/model_executor/models/gemma3n.py
index d0880103d4e86..a58b32793dbef 100644
--- a/vllm/model_executor/models/gemma3n.py
+++ b/vllm/model_executor/models/gemma3n.py
@@ -793,6 +793,7 @@ class Gemma3nForConditionalGeneration(nn.Module):
         del lora_config  # Unused.
         super().__init__()
         self.config = config
+        self.cache_config = vllm_config.cache_config
         self.model = Gemma3nModel(vllm_config=vllm_config,
                                   prefix=maybe_prefix(prefix, "model"))
         self.logits_processor = LogitsProcessor(
diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index d1599ba10b618..36bacf0cb36f8 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -3,8 +3,8 @@
 import abc
 import functools
 from abc import abstractmethod
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, ClassVar, Generic, Optional, TypeVar
+from dataclasses import dataclass, make_dataclass
+from typing import TYPE_CHECKING, Any, ClassVar, Generic, Optional, TypeVar
 
 import numpy as np
 import torch
@@ -508,3 +508,34 @@ def reorder_batch_to_split_decodes_and_prefills(
         modified_batch = True
 
     return modified_batch
+
+
+KV_SHARING_FAST_PREFILL_METADATA_FIELDS = [
+    ('logits_indices_padded', Optional[torch.Tensor], None),
+    ('num_logits_indices', int, 0),
+]
+
+
+def subclass_attention_metadata(
+    name_prefix: str,
+    metadata_cls: Any,
+    fields: list[tuple[str, Any, Any]],
+) -> Any:
+    """
+    Return a new subclass of `metadata_cls` with additional fields
+    """
+    name: str = name_prefix + metadata_cls.__name__  # type: ignore
+    Wrapped = make_dataclass(name, fields, bases=(metadata_cls, ))
+    return Wrapped
+
+
+def make_kv_sharing_fast_prefill_attention_metadata(
+    metadata_cls: Any, ) -> Any:
+    """
+    Return a new subclass of `metadata_cls` for fast prefill
+    """
+    return subclass_attention_metadata(
+        name_prefix="KVSharingFastPrefill",
+        metadata_cls=metadata_cls,
+        fields=KV_SHARING_FAST_PREFILL_METADATA_FIELDS,
+    )
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 3befb6adf2753..987ef22a1b7fb 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import dataclasses
 import gc
 import time
 from contextlib import contextmanager
@@ -47,6 +48,7 @@ from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
 from vllm.v1.attention.backends.mamba_selectors import get_mamba_attn_backend
 from vllm.v1.attention.backends.utils import (
     AttentionMetadataBuilder, CommonAttentionMetadata,
+    make_kv_sharing_fast_prefill_attention_metadata,
     make_local_attention_virtual_batches)
 from vllm.v1.core.encoder_cache_manager import compute_encoder_budget
 from vllm.v1.kv_cache_interface import (AttentionSpec,
@@ -320,6 +322,12 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         # means this layer will perform attention using the keys and values
         # from the KV cache of `shared_kv_cache_layers[layer_name]`.
         self.shared_kv_cache_layers: dict[str, str] = {}
+        self.kv_sharing_fast_prefill_eligible_layers: set[str] = set()
+
+        self.kv_sharing_fast_prefill_logits_indices = None
+        if self.cache_config.kv_sharing_fast_prefill:
+            self.kv_sharing_fast_prefill_logits_indices = torch.zeros(
+                self.max_num_tokens, dtype=torch.int32, device=self.device)
 
     def _may_reorder_batch(self, scheduler_output: "SchedulerOutput") -> None:
         """
@@ -735,6 +743,55 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
 
         spec_decode_common_attn_metadata = None
 
+        use_spec_decode = len(
+            scheduler_output.scheduled_spec_decode_tokens) > 0
+        if not use_spec_decode:
+            # NOTE(woosuk): Due to chunked prefills, the batch may contain
+            # partial requests. While we should not sample any token
+            # from these partial requests, we do so for simplicity.
+            # We will ignore the sampled tokens from the partial requests.
+            # TODO: Support prompt logprobs.
+            logits_indices = query_start_loc[1:] - 1
+            spec_decode_metadata = None
+        else:
+            # Get the number of draft tokens for each request.
+            # Iterate over the dictionary rather than all requests since not all
+            # requests have draft tokens.
+            num_draft_tokens = np.zeros(num_reqs, dtype=np.int32)
+            for req_id, draft_token_ids in (
+                    scheduler_output.scheduled_spec_decode_tokens.items()):
+                req_idx = self.input_batch.req_id_to_index[req_id]
+                num_draft_tokens[req_idx] = len(draft_token_ids)
+
+            spec_decode_metadata = self._calc_spec_decode_metadata(
+                num_draft_tokens, cu_num_tokens)
+            logits_indices = spec_decode_metadata.logits_indices
+
+        logits_indices_padded = None
+        if self.cache_config.kv_sharing_fast_prefill:
+            assert self.kv_sharing_fast_prefill_logits_indices is not None
+            num_logits = logits_indices.shape[0]
+            assert num_logits > 0
+            self.kv_sharing_fast_prefill_logits_indices[:num_logits].copy_(
+                logits_indices)
+            # There might have leftover indices in logits_indices[num_logits:]
+            # from previous iterations, whose values may be greater than the
+            # batch size in the current iteration. To ensure indices are always
+            # valid, we fill the padded indices with the last index.
+            self.kv_sharing_fast_prefill_logits_indices[num_logits:].fill_(
+                logits_indices[-1].item())
+            if (self.use_cuda_graph
+                    and num_logits <= self.cudagraph_batch_sizes[-1]):
+                # Use piecewise CUDA graphs.
+                # Add padding to the batch size.
+                num_logits_padded = self.vllm_config.pad_for_cudagraph(
+                    num_logits)
+            else:
+                num_logits_padded = num_logits
+            logits_indices_padded = (
+                self.kv_sharing_fast_prefill_logits_indices[:num_logits_padded]
+            )
+
         attn_metadata: dict[str, Any] = {}
 
         # Prepare encoder attention metadata separately
@@ -806,7 +863,28 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 common_attn_metadata=common_attn_metadata,
             ))
 
+            fast_prefill_metadata = attn_metadata_i
+            if (self.cache_config.kv_sharing_fast_prefill
+                    and self.kv_sharing_fast_prefill_eligible_layers):
+                # Dynamically create a a dataclass type that inherits
+                # from attention metadata type but includes additional
+                # fields logits_indices_padded and num_logits_indices
+                # which are required for prefill truncation
+                fast_prefill_metadata_type = (
+                    make_kv_sharing_fast_prefill_attention_metadata(
+                        metadata_cls=type(attn_metadata_i), ))
+                fast_prefill_metadata = fast_prefill_metadata_type(
+                    **dataclasses.asdict(attn_metadata_i),
+                    logits_indices_padded=logits_indices_padded,
+                    num_logits_indices=logits_indices.size(0),
+                )
+
             for layer_name in kv_cache_group_spec.layer_names:
+                if (self.cache_config.kv_sharing_fast_prefill and layer_name
+                        in self.kv_sharing_fast_prefill_eligible_layers):
+                    attn_metadata[layer_name] = fast_prefill_metadata
+                    continue
+
                 attn_metadata[layer_name] = attn_metadata_i
 
             # Hack for now to fix chunked local attention + no hybrid kv cache
@@ -838,30 +916,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             b.can_run_in_cudagraph(common_attn_metadata)
             for b in self.attn_metadata_builders)
 
-        use_spec_decode = len(
-            scheduler_output.scheduled_spec_decode_tokens) > 0
-        if not use_spec_decode:
-            # NOTE(woosuk): Due to chunked prefills, the batch may contain
-            # partial requests. While we should not sample any token
-            # from these partial requests, we do so for simplicity.
-            # We will ignore the sampled tokens from the partial requests.
-            # TODO: Support prompt logprobs.
-            logits_indices = query_start_loc[1:] - 1
-            spec_decode_metadata = None
-        else:
-            # Get the number of draft tokens for each request.
-            # Iterate over the dictionary rather than all requests since not all
-            # requests have draft tokens.
-            num_draft_tokens = np.zeros(num_reqs, dtype=np.int32)
-            for req_id, draft_token_ids in (
-                    scheduler_output.scheduled_spec_decode_tokens.items()):
-                req_idx = self.input_batch.req_id_to_index[req_id]
-                num_draft_tokens[req_idx] = len(draft_token_ids)
-
-            spec_decode_metadata = self._calc_spec_decode_metadata(
-                num_draft_tokens, cu_num_tokens)
-            logits_indices = spec_decode_metadata.logits_indices
-
         # Hot-Swap lora model
         if self.lora_config:
             self.set_active_loras(self.input_batch, num_scheduled_tokens)
@@ -1433,6 +1487,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
          spec_decode_metadata, num_scheduled_tokens_np,
          spec_decode_common_attn_metadata) = (
              self._prepare_inputs(scheduler_output))
+
         num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
         if (self.use_cuda_graph
                 and num_scheduled_tokens <= self.cudagraph_batch_sizes[-1]):
@@ -2814,6 +2869,16 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 kv_cache_config.kv_cache_groups,
                 kv_caches,
             )
+            attn_layers = get_layers_from_vllm_config(self.vllm_config,
+                                                      Attention)
+            # Iterate in reversed order and add layers that re-use KV cache
+            # e.g. in YOCO-like KV sharing setups (e.g. Gemma3n)
+            for layer_name in reversed(attn_layers):
+                if layer_name in self.shared_kv_cache_layers:
+                    self.kv_sharing_fast_prefill_eligible_layers.add(
+                        layer_name)
+                else:
+                    break
 
         bind_kv_cache(kv_caches,
                       self.compilation_config.static_forward_context,

From 5c765aec65d0f978cc2ad42164a5da2d3e0cf071 Mon Sep 17 00:00:00 2001
From: 633WHU <cliu_whu@yeah.net>
Date: Wed, 30 Jul 2025 23:54:44 +0800
Subject: [PATCH 069/224] [Bugfix] Fix TypeError in scheduler when comparing
 mixed request_id types (#21816)

Signed-off-by: chiliu <chiliu@paypal.com>
Co-authored-by: chiliu <chiliu@paypal.com>
---
 tests/v1/engine/test_engine_core.py | 72 +++++++++++++++++++++++------
 vllm/v1/engine/core.py              |  5 ++
 2 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/tests/v1/engine/test_engine_core.py b/tests/v1/engine/test_engine_core.py
index bbdc73e9608a1..eb826bf06236f 100644
--- a/tests/v1/engine/test_engine_core.py
+++ b/tests/v1/engine/test_engine_core.py
@@ -236,7 +236,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
     Test that the engine can handle multiple concurrent batches.
     """
 
-    def make_request_with_max_tokens(req_id: int,
+    def make_request_with_max_tokens(req_id: str,
                                      max_tokens: int) -> EngineCoreRequest:
         request = make_request()
         request.request_id = req_id
@@ -297,16 +297,16 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
         assert engine_core.batch_queue is not None
 
         # Add two requests in a row. Each request have 12 prompt tokens.
-        req0 = make_request_with_max_tokens(0, 5)
+        req0 = make_request_with_max_tokens("0", 5)
         engine_core.add_request(req0)
-        req1 = make_request_with_max_tokens(1, 5)
+        req1 = make_request_with_max_tokens("1", 5)
         engine_core.add_request(req1)
 
         # Schedule Batch 1: (10, req0)
         assert engine_core.step_with_batch_queue()[0] is None
         assert engine_core.batch_queue.qsize() == 1
         scheduler_output = engine_core.batch_queue.queue[-1][1]
-        assert scheduler_output.num_scheduled_tokens[0] == 10
+        assert scheduler_output.num_scheduled_tokens["0"] == 10
         # num_computed_tokens should have been updated immediately.
         assert engine_core.scheduler.requests[
             req0.request_id].num_computed_tokens == 10
@@ -315,11 +315,11 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
         assert engine_core.step_with_batch_queue()[0] is None
         assert engine_core.batch_queue.qsize() == 2
         scheduler_output = engine_core.batch_queue.queue[-1][1]
-        assert scheduler_output.num_scheduled_tokens[0] == 2
-        assert scheduler_output.num_scheduled_tokens[1] == 8
+        assert scheduler_output.num_scheduled_tokens["0"] == 2
+        assert scheduler_output.num_scheduled_tokens["1"] == 8
         # num_computed_tokens should have been updated immediately.
-        assert engine_core.scheduler.requests[0].num_computed_tokens == 12
-        assert engine_core.scheduler.requests[1].num_computed_tokens == 8
+        assert engine_core.scheduler.requests["0"].num_computed_tokens == 12
+        assert engine_core.scheduler.requests["1"].num_computed_tokens == 8
 
         assert engine_core.scheduler.get_num_unfinished_requests() == 2
 
@@ -331,7 +331,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
         engine_core.step_with_batch_queue()
         assert engine_core.batch_queue.qsize() == 2
         scheduler_output = engine_core.batch_queue.queue[-1][1]
-        assert scheduler_output.num_scheduled_tokens[1] == 4
+        assert scheduler_output.num_scheduled_tokens["1"] == 4
 
         # Batch queue is full. Finish Batch 2. Get first token of req0.
         output = engine_core.step_with_batch_queue()[0].get(0)
@@ -343,7 +343,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
         engine_core.step_with_batch_queue()
         assert engine_core.batch_queue.qsize() == 2
         scheduler_output = engine_core.batch_queue.queue[-1][1]
-        assert scheduler_output.num_scheduled_tokens[0] == 1
+        assert scheduler_output.num_scheduled_tokens["0"] == 1
 
         # Batch queue is full. Finish Batch 3. Get first token of req1.
         output = engine_core.step_with_batch_queue()[0].get(0)
@@ -355,14 +355,14 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
         engine_core.step_with_batch_queue()
         assert engine_core.batch_queue.qsize() == 2
         scheduler_output = engine_core.batch_queue.queue[-1][1]
-        assert scheduler_output.num_scheduled_tokens[1] == 1
+        assert scheduler_output.num_scheduled_tokens["1"] == 1
 
         # Loop until req0 is finished.
         step = 0
         req_id = 0
         expected_num_tokens = [
-            engine_core.scheduler.requests[0].num_tokens + 1,
-            engine_core.scheduler.requests[1].num_tokens + 1,
+            engine_core.scheduler.requests["0"].num_tokens + 1,
+            engine_core.scheduler.requests["1"].num_tokens + 1,
         ]
         while engine_core.scheduler.get_num_unfinished_requests() == 2:
             output = engine_core.step_with_batch_queue()[0]
@@ -413,3 +413,49 @@ def test_engine_core_tp(monkeypatch: pytest.MonkeyPatch):
             get_worker_cache_config_field, args=("num_cpu_blocks", ))
         assert all(x is not None for x in num_gpu_blocks)
         assert all(x is not None for x in num_cpu_blocks)
+
+
+@create_new_process_for_each_test()
+def test_engine_core_invalid_request_id_type(monkeypatch: pytest.MonkeyPatch):
+    """Test that engine raises TypeError for non-string request_id."""
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_USE_V1", "1")
+
+        engine_args = EngineArgs(model=MODEL_NAME)
+        vllm_config = engine_args.create_engine_config()
+        executor_class = Executor.get_class(vllm_config)
+
+        with set_default_torch_num_threads(1):
+            engine_core = EngineCore(vllm_config=vllm_config,
+                                     executor_class=executor_class,
+                                     log_stats=True)
+
+        # Test with UUID object (common mistake)
+        uuid_request = make_request()
+        uuid_request.request_id = uuid.uuid4()  # UUID object instead of string
+
+        with pytest.raises(TypeError,
+                           match="request_id must be a string, got.*UUID"):
+            engine_core.add_request(uuid_request)
+
+        # Test with integer
+        int_request = make_request()
+        int_request.request_id = 12345
+
+        with pytest.raises(TypeError,
+                           match="request_id must be a string, got.*int"):
+            engine_core.add_request(int_request)
+
+        # Test with None
+        none_request = make_request()
+        none_request.request_id = None
+
+        with pytest.raises(TypeError,
+                           match="request_id must be a string, got.*NoneType"):
+            engine_core.add_request(none_request)
+
+        # Verify engine is still functional after errors
+        valid_request = make_request()
+        engine_core.add_request(valid_request)
+        assert len(engine_core.scheduler.waiting) == 1
+        assert len(engine_core.scheduler.running) == 0
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index cad93061e65b0..39fda521f36af 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -207,6 +207,11 @@ class EngineCore:
 
     def add_request(self, request: EngineCoreRequest):
         """Add request to the scheduler."""
+        # Validate the request_id type.
+        if not isinstance(request.request_id, str):
+            raise TypeError(
+                f"request_id must be a string, got {type(request.request_id)}")
+
         if pooling_params := request.pooling_params:
             supported_pooling_tasks = [
                 task for task in self.get_supported_tasks()

From 004203e95330ac9a878df8192619570b0770667e Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Thu, 31 Jul 2025 00:10:41 +0800
Subject: [PATCH 070/224] [CI/Build] Fix registry tests (#21934)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/models/registry.py                    | 16 +++++++----
 vllm/model_executor/models/mpt.py           | 20 ++++++-------
 vllm/model_executor/models/telechat2.py     | 15 ++++++++--
 vllm/transformers_utils/config.py           |  5 ++--
 vllm/transformers_utils/configs/__init__.py |  2 ++
 vllm/transformers_utils/configs/nvlm_d.py   | 31 +++++++++++++++++++++
 6 files changed, 70 insertions(+), 19 deletions(-)
 create mode 100644 vllm/transformers_utils/configs/nvlm_d.py

diff --git a/tests/models/registry.py b/tests/models/registry.py
index caa691039fce3..8fcff5a8c5113 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -170,8 +170,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
                                             min_transformers_version="4.54"),
     "Ernie4_5_MoeForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-21B-A3B-PT",
                                                min_transformers_version="4.54"),
-    "ExaoneForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"),  # noqa: E501
-    "Exaone4ForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-4.0-32B"),  # noqa: E501
+    "ExaoneForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
+                                         trust_remote_code=True),
+    "Exaone4ForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-4.0-32B",
+                                          min_transformers_version="4.54"),
     "Fairseq2LlamaForCausalLM": _HfExamplesInfo("mgleize/fairseq2-dummy-Llama-3.2-1B"),  # noqa: E501
     "FalconForCausalLM": _HfExamplesInfo("tiiuae/falcon-7b"),
     "FalconH1ForCausalLM":_HfExamplesInfo("tiiuae/Falcon-H1-0.5B-Base",
@@ -199,8 +201,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
                                              trust_remote_code=True),
     "HunYuanMoEV1ForCausalLM": _HfExamplesInfo("tencent/Hunyuan-A13B-Instruct",
                                                trust_remote_code=True),
+    # TODO: Remove is_available_online once their config.json is fixed
     "HunYuanDenseV1ForCausalLM":_HfExamplesInfo("tencent/Hunyuan-7B-Instruct-0124",
-                                               trust_remote_code=True),
+                                                trust_remote_code=True,
+                                                is_available_online=False),
     "HCXVisionForCausalLM": _HfExamplesInfo(
         "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",
         trust_remote_code=True),
@@ -275,7 +279,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
     "StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),  # noqa: E501
     "StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
     "Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
-    "SolarForCausalLM": _HfExamplesInfo("upstage/solar-pro-preview-instruct"),
+    "SolarForCausalLM": _HfExamplesInfo("upstage/solar-pro-preview-instruct",
+                                        trust_remote_code=True),
     "TeleChat2ForCausalLM": _HfExamplesInfo("Tele-AI/TeleChat2-3B",
                                             trust_remote_code=True),
     "TeleFLMForCausalLM": _HfExamplesInfo("CofeAI/FLM-2-52B-Instruct-2407",
@@ -449,7 +454,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
                                                           max_model_len=4096),
     "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B"),
     "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B-AWQ"),  # noqa: E501
-    "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"),
+    "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B",
+                                           trust_remote_code=True),
     "SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct"),  # noqa: E501
     "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b",  # noqa: E501
                                      trust_remote_code=True),
diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py
index c243f575ae54a..8db52a69924c9 100644
--- a/vllm/model_executor/models/mpt.py
+++ b/vllm/model_executor/models/mpt.py
@@ -8,7 +8,7 @@ from typing import Optional, Union
 
 import torch
 import torch.nn as nn
-from transformers import PretrainedConfig
+from transformers import MptConfig
 
 from vllm.attention import Attention
 from vllm.compilation.decorators import support_torch_compile
@@ -50,7 +50,7 @@ class MPTAttention(nn.Module):
 
     def __init__(
         self,
-        config: PretrainedConfig,
+        config: MptConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
@@ -59,15 +59,15 @@ class MPTAttention(nn.Module):
         self.d_model = config.d_model
         self.total_num_heads = config.n_heads
         self.head_dim = self.d_model // self.total_num_heads
-        self.clip_qkv = config.attn_config["clip_qkv"]
-        self.qk_ln = config.attn_config["qk_ln"]
-        self.alibi_bias_max = config.attn_config["alibi_bias_max"]
+        self.clip_qkv = config.attn_config.clip_qkv
+        self.qk_ln = config.attn_config.qk_ln
+        self.alibi_bias_max = config.attn_config.alibi_bias_max
         if "kv_n_heads" in config.attn_config:
-            self.total_num_kv_heads = config.attn_config['kv_n_heads']
+            self.total_num_kv_heads = config.attn_config.kv_n_heads
         else:
             self.total_num_kv_heads = self.total_num_heads
-        assert not config.attn_config["prefix_lm"]
-        assert config.attn_config["alibi"]
+        assert not config.attn_config.prefix_lm
+        assert config.attn_config.alibi
 
         # pylint: disable=invalid-name
         self.Wqkv = QKVParallelLinear(
@@ -144,7 +144,7 @@ class MPTMLP(nn.Module):
 
     def __init__(
         self,
-        config: PretrainedConfig,
+        config: MptConfig,
         quant_config: Optional[QuantizationConfig] = None,
     ):
         super().__init__()
@@ -176,7 +176,7 @@ class MPTBlock(nn.Module):
 
     def __init__(
         self,
-        config: PretrainedConfig,
+        config: MptConfig,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
         prefix: str = "",
diff --git a/vllm/model_executor/models/telechat2.py b/vllm/model_executor/models/telechat2.py
index f0b31b1332fb1..49a7677151a94 100644
--- a/vllm/model_executor/models/telechat2.py
+++ b/vllm/model_executor/models/telechat2.py
@@ -37,9 +37,20 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper,
 class TeleChat2Model(LlamaModel):
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        hf_config = vllm_config.model_config.hf_config
+
+        vllm_config.model_config.hf_config.attribute_map = {
+            "num_hidden_layers": "n_layer",
+            "num_attention_heads": "n_head",
+            "intermediate_size": "ffn_hidden_size",
+            "rms_norm_eps": "layer_norm_epsilon"
+        }
+        vllm_config.model_config.hf_config.hidden_act = "silu"
+
         # 1. Initialize the LlamaModel with bias
-        vllm_config.model_config.hf_config.bias = True
-        vllm_config.model_config.hf_config.mlp_bias = True
+        hf_config.bias = True
+        hf_config.mlp_bias = True
+
         super().__init__(vllm_config=vllm_config, prefix=prefix)
         # 2. Remove the bias from the qkv_proj and gate_up_proj based on config
         # Telechat2's gate_up_proj and qkv_proj don't have bias
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 40a6a9118e53e..4ce56cb3a6aac 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -34,8 +34,8 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DeepseekVLV2Config,
                                              KimiVLConfig, MedusaConfig,
                                              MllamaConfig, MLPSpeculatorConfig,
                                              Nemotron_Nano_VL_Config,
-                                             NemotronConfig, RWConfig,
-                                             UltravoxConfig)
+                                             NemotronConfig, NVLM_D_Config,
+                                             RWConfig, UltravoxConfig)
 # yapf: enable
 from vllm.transformers_utils.configs.mistral import adapt_config_dict
 from vllm.transformers_utils.utils import check_gguf_file
@@ -81,6 +81,7 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
     "medusa": MedusaConfig,
     "eagle": EAGLEConfig,
     "nemotron": NemotronConfig,
+    "NVLM_D": NVLM_D_Config,
     "ultravox": UltravoxConfig,
     **_CONFIG_REGISTRY_OVERRIDE_HF
 }
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index 0fcb2beb8c7db..7c7d859e4a325 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -23,6 +23,7 @@ from vllm.transformers_utils.configs.moonvit import MoonViTConfig
 from vllm.transformers_utils.configs.nemotron import NemotronConfig
 from vllm.transformers_utils.configs.nemotron_h import NemotronHConfig
 from vllm.transformers_utils.configs.nemotron_vl import Nemotron_Nano_VL_Config
+from vllm.transformers_utils.configs.nvlm_d import NVLM_D_Config
 from vllm.transformers_utils.configs.ultravox import UltravoxConfig
 
 __all__ = [
@@ -39,5 +40,6 @@ __all__ = [
     "NemotronConfig",
     "NemotronHConfig",
     "Nemotron_Nano_VL_Config",
+    "NVLM_D_Config",
     "UltravoxConfig",
 ]
diff --git a/vllm/transformers_utils/configs/nvlm_d.py b/vllm/transformers_utils/configs/nvlm_d.py
new file mode 100644
index 0000000000000..edfc506882ff5
--- /dev/null
+++ b/vllm/transformers_utils/configs/nvlm_d.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# Adapted from
+# https://huggingface.co/nvidia/NVLM-D-72B/blob/main/configuration_nvlm_d.py
+# --------------------------------------------------------
+# NVLM-D
+# Copyright (c) 2024 NVIDIA
+# Licensed under Apache 2.0 License [see LICENSE for details]
+# --------------------------------------------------------
+from transformers import Qwen2Config
+from transformers.configuration_utils import PretrainedConfig
+
+
+class NVLM_D_Config(PretrainedConfig):
+    model_type = 'NVLM_D'
+    is_composition = True
+
+    def __init__(self, vision_config=None, llm_config=None, **kwargs):
+        super().__init__(**kwargs)
+
+        # Handle vision_config initialization
+        if vision_config is None:
+            vision_config = {}
+
+        # Handle llm_config initialization
+        if llm_config is None:
+            llm_config = {}
+
+        self.vision_config = PretrainedConfig(**vision_config)
+        self.text_config = Qwen2Config(**llm_config)

From 4904e53c3277e92c881bf2a1442805bdc3da983f Mon Sep 17 00:00:00 2001
From: Chenguang Zheng <645327136@qq.com>
Date: Thu, 31 Jul 2025 00:18:37 +0800
Subject: [PATCH 071/224] [Bugfix] SharedStorage Connector for V1 PD multimodal
 (#21611)

Signed-off-by: fake0fan <645327136@qq.com>
Signed-off-by: herotai214 <herotai214@gmail.com>
Co-authored-by: herotai214 <herotai214@gmail.com>
---
 .../unit/test_shared_storage_connector.py     | 215 ++++++++++++++++++
 .../v1/shared_storage_connector.py            |  41 +++-
 2 files changed, 244 insertions(+), 12 deletions(-)
 create mode 100644 tests/v1/kv_connector/unit/test_shared_storage_connector.py

diff --git a/tests/v1/kv_connector/unit/test_shared_storage_connector.py b/tests/v1/kv_connector/unit/test_shared_storage_connector.py
new file mode 100644
index 0000000000000..ee3e71d3b8452
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_shared_storage_connector.py
@@ -0,0 +1,215 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import asdict
+from typing import NamedTuple
+
+from PIL import Image
+
+from vllm import LLM, EngineArgs, SamplingParams
+from vllm.assets.image import ImageAsset
+from vllm.config import KVTransferConfig
+from vllm.multimodal.utils import encode_image_base64
+
+MODEL_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
+
+SAMPLING_PARAMS = SamplingParams(temperature=0.0, top_k=1, max_tokens=128)
+
+TEXT_PROMPTS = [
+    "What's in the image(s)? Around 30 words. What's special in 2nd image?",
+    "The future of AI is",
+]
+
+
+class InputCase(NamedTuple):
+    text: str
+    img: list[Image]
+    expected_len: int
+    info: str
+
+
+def _check_path_len(path):
+    """Return the latest length in path"""
+    return len(list(path.iterdir()))
+
+
+def _list_path(path):
+    """Return the list of foldername (hashes generatd) under the path"""
+    return list(path.iterdir())
+
+
+def run_test(tmp_path, processor, llm: LLM, question: str,
+             image_urls: list[Image], expected_len: int, info: str):
+    """
+    One individual test to process the prompt and output base on 1 set of input
+    Then check if the length in the strorage path matches the expected length
+    `info` introduces details or purpose of the individual test
+    """
+    print(f"***info: {info}***")
+    print(
+        f"**Expected storage path length after llm generate: {expected_len}**")
+    process_prompt(processor, llm, question, image_urls)
+
+    print(f"Path matched expected length: {_check_path_len(tmp_path)}")
+    print(f"Hashes under the storage path: {_list_path(tmp_path)}")
+
+    assert _check_path_len(tmp_path) == expected_len, (
+        f"Expect storage path length {expected_len} ;",
+        f"but end up {_check_path_len(tmp_path)} instead. ", f"Info: {info}")
+
+
+def process_prompt(processor, llm: LLM, question: str,
+                   image_urls: list[Image]):
+    """
+    Form the prompt based on the text and image input, then llm generate output
+    """
+    placeholders = [{
+        "type": "image_url",
+        "image_url": {
+            "url": f"data:image;base64,{encode_image_base64(image_pil)}"
+        }
+    } for image_pil in image_urls]
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant."
+        },
+        {
+            "role": "user",
+            "content": [
+                *placeholders,
+                {
+                    "type": "text",
+                    "text": question
+                },
+            ],
+        },
+    ]
+
+    prompt = processor.apply_chat_template(messages,
+                                           tokenize=False,
+                                           add_generation_prompt=True)
+
+    outputs = llm.generate(
+        {
+            "prompt":
+            prompt,
+            **({
+                "multi_modal_data": {
+                    "image": [*image_urls]
+                }
+            } if image_urls else {})
+        },
+        sampling_params=SAMPLING_PARAMS,
+    )
+
+    print("-" * 50)
+    print("Output:")
+    for o in outputs:
+        generated_text = o.outputs[0].text
+        print(generated_text)
+        print("-" * 50)
+
+
+def test_shared_storage_connector_hashes(tmp_path):
+    """
+    Tests that SharedStorageConnector saves KV to the storage locations
+    with proper hashes; that are unique for inputs with identical text but 
+    differnt images (same size), or same multiple images but different orders.
+    """
+    # Using tmp_path as the storage path to store KV
+    print(f"KV storage path at: {str(tmp_path)}")
+
+    # Configure the SharedStorageConnector
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="SharedStorageConnector",
+        kv_role="kv_both",
+        kv_connector_extra_config={"shared_storage_path": str(tmp_path)})
+
+    engine_args = EngineArgs(
+        model=MODEL_NAME,
+        max_model_len=8192,
+        max_num_seqs=1,
+        kv_transfer_config=kv_transfer_config,
+        limit_mm_per_prompt={"image": 2},
+    )
+
+    # don't put this import at the top level
+    # it will call torch.cuda.device_count()
+    from transformers import AutoProcessor  # noqa: F401
+
+    # Create processor to handle the chat prompt
+    processor = AutoProcessor.from_pretrained(MODEL_NAME)
+
+    # Prepare images for the tests
+    # Resize to the same size to check hashes correctness
+    image_1 = ImageAsset("stop_sign").pil_image.resize((1280, 720))
+    image_2 = ImageAsset("cherry_blossom").pil_image.resize((1280, 720))
+
+    # Make sure that they are not the same picture
+    assert image_1 != image_2, "The images should not be identical"
+
+    # Create the LLM instance
+    engine_args = asdict(engine_args)
+    llm = LLM(**engine_args)
+
+    # Prepare the input cases
+    input_cases = [
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[image_1],
+                  expected_len=1,
+                  info="image_1 single input the first time."),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[image_2],
+                  expected_len=2,
+                  info=("image_2 single input the first time. "
+                        "It is in same pixel size with image_1, yet it "
+                        "should be able to form a new unique hash.")),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[image_1],
+                  expected_len=2,
+                  info=("image_1 single input the 2nd time. "
+                        "It should not form aother new hash.")),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[image_2],
+                  expected_len=2,
+                  info=("image_2 single input the 2nd time. "
+                        "It should not form aother new hash.")),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[image_1, image_2],
+                  expected_len=3,
+                  info="image_1 with image_2 input the first time."),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[image_2, image_1],
+                  expected_len=4,
+                  info="The image order is swapped. Should form new hash."),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[image_1, image_2],
+                  expected_len=4,
+                  info=("[image_1, image_2] input the 2nd time. "
+                        "It should not form aother new hash.")),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[image_2, image_1],
+                  expected_len=4,
+                  info=("[image_2, image_1] input the 2nd time. "
+                        "It should not form aother new hash.")),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[],
+                  expected_len=5,
+                  info="Pure text input test as a case-control"),
+        InputCase(text=TEXT_PROMPTS[0],
+                  img=[],
+                  expected_len=5,
+                  info="Identical pure text input as a case-control"),
+        InputCase(text=TEXT_PROMPTS[1],
+                  img=[],
+                  expected_len=6,
+                  info="Another pure text input as a case-control"),
+    ]
+
+    # Run tests
+    for case_id, (text, img, expected_len, info) in enumerate(input_cases):
+        print("\n", "=" * 25, f"Below running input case: {case_id}", "=" * 25)
+        run_test(tmp_path, processor, llm, text, img, expected_len, info)
+
+    print("All tests passed successfully!")
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py
index 048748e6b8ecb..fd79387269d56 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py
@@ -32,10 +32,11 @@ class ReqMeta:
     slot_mapping: torch.Tensor
     # Is store or load
     is_store: bool
+    mm_hashes: list[str]
 
     @staticmethod
     def make_meta(token_ids: list[int], block_ids: list[int], block_size: int,
-                  is_store: bool) -> "ReqMeta":
+                  is_store: bool, mm_hashes: list[str]) -> "ReqMeta":
         valid_num_tokens = align_to_block_size(len(token_ids), block_size)
         token_ids_tensor = torch.tensor(token_ids)[:valid_num_tokens]
         block_ids_tensor = torch.tensor(block_ids)
@@ -48,6 +49,7 @@ class ReqMeta:
             token_ids=token_ids_tensor,
             slot_mapping=slot_mapping,
             is_store=is_store,
+            mm_hashes=mm_hashes,
         )
 
 
@@ -64,9 +66,11 @@ class SharedStorageConnectorMetadata(KVConnectorMetadata):
         block_ids: list[int],
         block_size: int,
         is_store: bool,
+        mm_hashes: list[str],
     ) -> None:
         self.requests.append(
-            ReqMeta.make_meta(token_ids, block_ids, block_size, is_store))
+            ReqMeta.make_meta(token_ids, block_ids, block_size, is_store,
+                              mm_hashes))
 
 
 class SharedStorageConnector(KVConnectorBase_V1):
@@ -169,7 +173,7 @@ class SharedStorageConnector(KVConnectorBase_V1):
                         forward_context.virtual_engine]
 
                 filename = self._generate_filename_debug(
-                    layer_name, request.token_ids)
+                    layer_name, request.token_ids, request.mm_hashes)
                 kv_cache = safetensors.torch.load_file(
                     filename)["kv_cache"].cuda()
                 inject_kv_into_layer(kv_cache_layer, kv_cache,
@@ -221,7 +225,7 @@ class SharedStorageConnector(KVConnectorBase_V1):
         for request in connector_metadata.requests:
             if request.is_store:
                 filename = self._generate_filename_debug(
-                    layer_name, request.token_ids)
+                    layer_name, request.token_ids, request.mm_hashes)
                 kv_cache = extract_kv_from_layer(kv_layer,
                                                  request.slot_mapping)
                 tensors = {"kv_cache": kv_cache.detach().cpu()}
@@ -299,7 +303,8 @@ class SharedStorageConnector(KVConnectorBase_V1):
                 meta.add_request(token_ids=new_req.prompt_token_ids,
                                  block_ids=new_req.block_ids[0],
                                  block_size=self._block_size,
-                                 is_store=False)
+                                 is_store=False,
+                                 mm_hashes=new_req.mm_hashes)
                 total_need_load += 1
             else:
                 # NOTE: here, we set the store and load being exclusive,
@@ -310,7 +315,8 @@ class SharedStorageConnector(KVConnectorBase_V1):
                     meta.add_request(token_ids=new_req.prompt_token_ids,
                                      block_ids=new_req.block_ids[0],
                                      block_size=self._block_size,
-                                     is_store=True)
+                                     is_store=True,
+                                     mm_hashes=new_req.mm_hashes)
 
         cached_reqs = scheduler_output.scheduled_cached_reqs
         for i, req_id in enumerate(cached_reqs.req_ids):
@@ -338,7 +344,8 @@ class SharedStorageConnector(KVConnectorBase_V1):
                 meta.add_request(token_ids=token_ids,
                                  block_ids=block_ids,
                                  block_size=self._block_size,
-                                 is_store=False)
+                                 is_store=False,
+                                 mm_hashes=request.mm_hashes)
                 total_need_load += 1
 
         assert total_need_load == len(self._requests_need_load)
@@ -359,20 +366,28 @@ class SharedStorageConnector(KVConnectorBase_V1):
             len(request.prompt_token_ids) - 1, self._block_size)
         foldername = self._generate_foldername_debug(torch.tensor(
             request.prompt_token_ids)[:num_tokens_to_check],
+                                                     request.mm_hashes,
                                                      create_folder=False)
         return os.path.exists(foldername)
 
     def _generate_foldername_debug(
         self,
-        input_ids: torch.Tensor,
+        token_ids: torch.Tensor,
+        mm_hashes: list[str],
         create_folder=False,
     ) -> str:
         """Generate a folder name based on the hash of the bytes of the input 
         ids.
         """
-        input_ids_bytes = input_ids.numpy().tobytes()
-        input_ids_hash = hashlib.md5(input_ids_bytes,
+        token_bytes = token_ids.numpy().tobytes()
+        # Add mm_hashes to the bytes being hashed to avoid path traversal and
+        # to create a canonical key.
+        if mm_hashes:
+            mm_str = "-".join(mm_hashes)
+            token_bytes += mm_str.encode('utf-8')
+        input_ids_hash = hashlib.md5(token_bytes,
                                      usedforsecurity=False).hexdigest()
+
         foldername = os.path.join(self._storage_path, input_ids_hash)
         if create_folder:
             os.makedirs(foldername, exist_ok=True)
@@ -381,12 +396,14 @@ class SharedStorageConnector(KVConnectorBase_V1):
     def _generate_filename_debug(
         self,
         layer_name: str,
-        input_ids: torch.Tensor,
+        token_ids: torch.Tensor,
+        mm_hashes: list[str],
     ) -> str:
         """Generate a file name based on the layer name and the hash 
         of the bytes of the input ids.
         """
-        foldername = self._generate_foldername_debug(input_ids,
+        foldername = self._generate_foldername_debug(token_ids,
+                                                     mm_hashes=mm_hashes,
                                                      create_folder=True)
         return os.path.join(foldername, f"{layer_name}.safetensors")
 

From f4135232b9a8c4845f8961fb1cd17581c56ae2ce Mon Sep 17 00:00:00 2001
From: wxsm <wxsms@foxmail.com>
Date: Thu, 31 Jul 2025 00:41:51 +0800
Subject: [PATCH 072/224] feat(distributed): add `get_required_kvcache_layout`
 class method to kv connector api (#20433)

Signed-off-by: wxsm <wxsms@foxmail.com>
---
 tests/distributed/test_kvlayout.py            | 72 +++++++++++++++++++
 .../kv_transfer/kv_connector/base.py          | 16 ++++-
 .../kv_transfer/kv_connector/factory.py       | 37 +++++-----
 .../kv_transfer/kv_connector/utils.py         | 19 ++---
 .../kv_transfer/kv_connector/v1/base.py       | 14 ++++
 .../kv_connector/v1/multi_connector.py        | 33 +++++++++
 .../kv_connector/v1/nixl_connector.py         | 23 +++++-
 7 files changed, 186 insertions(+), 28 deletions(-)
 create mode 100644 tests/distributed/test_kvlayout.py

diff --git a/tests/distributed/test_kvlayout.py b/tests/distributed/test_kvlayout.py
new file mode 100644
index 0000000000000..d447876f6cc7c
--- /dev/null
+++ b/tests/distributed/test_kvlayout.py
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.config import (DeviceConfig, KVTransferConfig, ModelConfig,
+                         VllmConfig, set_current_vllm_config)
+from vllm.distributed.kv_transfer.kv_connector.utils import (
+    get_kv_connector_cache_layout)
+from vllm.logger import init_logger
+
+logger = init_logger("test_expert_parallel")
+
+
+def test_get_kv_connector_cache_layout_without_kv_connector():
+    vllm_config = VllmConfig(device_config=DeviceConfig("cpu"))
+    with set_current_vllm_config(vllm_config):
+        # Test with default settings
+        layout = get_kv_connector_cache_layout()
+        assert layout == "NHD"
+
+
+def test_get_kv_connector_cache_layout_with_lmcache_connector():
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="LMCacheConnectorV1",
+        kv_role="kv_both",
+    )
+    vllm_config = VllmConfig(device_config=DeviceConfig("cpu"),
+                             kv_transfer_config=kv_transfer_config)
+    with set_current_vllm_config(vllm_config):
+        # Test with default settings
+        layout = get_kv_connector_cache_layout()
+        assert layout == "NHD"
+
+
+def test_get_kv_connector_cache_layout_with_nixl_connector():
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="NixlConnector",
+        kv_role="kv_both",
+    )
+    model_config = ModelConfig()
+    vllm_config = VllmConfig(device_config=DeviceConfig("cpu"),
+                             model_config=model_config,
+                             kv_transfer_config=kv_transfer_config)
+    with set_current_vllm_config(vllm_config):
+        # Test with default settings
+        layout = get_kv_connector_cache_layout()
+        assert layout == "HND"
+
+
+def test_get_kv_connector_cache_layout_with_multi_connector():
+    kv_transfer_config = KVTransferConfig(kv_connector="MultiConnector",
+                                          kv_role="kv_both",
+                                          kv_connector_extra_config={
+                                              "connectors": [{
+                                                  "kv_connector":
+                                                  "SharedStorageConnector",
+                                                  "kv_role":
+                                                  "kv_both"
+                                              }, {
+                                                  "kv_connector":
+                                                  "NixlConnector",
+                                                  "kv_role":
+                                                  "kv_both"
+                                              }]
+                                          })
+    model_config = ModelConfig()
+    vllm_config = VllmConfig(device_config=DeviceConfig("cpu"),
+                             model_config=model_config,
+                             kv_transfer_config=kv_transfer_config)
+    with set_current_vllm_config(vllm_config):
+        # Test with default settings
+        layout = get_kv_connector_cache_layout()
+        assert layout == "HND"
diff --git a/vllm/distributed/kv_transfer/kv_connector/base.py b/vllm/distributed/kv_transfer/kv_connector/base.py
index 181c33925da76..868b227fc8994 100644
--- a/vllm/distributed/kv_transfer/kv_connector/base.py
+++ b/vllm/distributed/kv_transfer/kv_connector/base.py
@@ -9,7 +9,7 @@ The class provides two primary abstract methods:
 """
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING, Optional, Union
 
 import torch
 
@@ -124,5 +124,19 @@ class KVConnectorBase(ABC):
 
         raise NotImplementedError
 
+    @classmethod
+    def get_required_kvcache_layout(
+            cls, vllm_config: "VllmConfig") -> Optional[str]:
+        """
+        Get the required KV cache layout for this connector.
+        Args:
+            vllm_config (VllmConfig): the vllm config.
+
+        Returns:
+            str: the required KV cache layout. e.g. HND, or NHD.
+            None if the connector does not require a specific layout.
+        """
+        return None
+
 
 KVConnectorBaseType = Union[KVConnectorBase, KVConnectorBase_V1]
diff --git a/vllm/distributed/kv_transfer/kv_connector/factory.py b/vllm/distributed/kv_transfer/kv_connector/factory.py
index be9ce72dea67a..cf7cde2c43771 100644
--- a/vllm/distributed/kv_transfer/kv_connector/factory.py
+++ b/vllm/distributed/kv_transfer/kv_connector/factory.py
@@ -5,6 +5,7 @@ import importlib
 from typing import TYPE_CHECKING, Callable
 
 import vllm.envs as envs
+from vllm.config import KVTransferConfig
 from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBaseType
 from vllm.distributed.kv_transfer.kv_connector.v1 import (KVConnectorBase_V1,
                                                           KVConnectorRole)
@@ -41,14 +42,27 @@ class KVConnectorFactory:
             raise ValueError("Attempting to initialize a V0 Connector, "
                              f"but found {envs.VLLM_USE_V1=}")
 
-        connector_name = config.kv_transfer_config.kv_connector
-        if connector_name not in cls._registry:
-            raise ValueError(f"Unsupported connector type: {connector_name}")
-
-        connector_cls = cls._registry[connector_name]()
+        connector_cls = cls.get_connector_class(config.kv_transfer_config)
         assert issubclass(connector_cls, KVConnectorBase)
         return connector_cls(rank, local_rank, config)
 
+    @classmethod
+    def get_connector_class(
+            cls, kv_transfer_config: "KVTransferConfig"
+    ) -> type[KVConnectorBaseType]:
+        """Get the connector class by name."""
+        connector_name = kv_transfer_config.kv_connector
+        if connector_name in cls._registry:
+            connector_cls = cls._registry[connector_name]()
+        else:
+            connector_module_path = kv_transfer_config.kv_connector_module_path
+            if connector_module_path is None:
+                raise ValueError(
+                    f"Unsupported connector type: {connector_name}")
+            connector_module = importlib.import_module(connector_module_path)
+            connector_cls = getattr(connector_module, connector_name)
+        return connector_cls
+
     @classmethod
     def create_connector_v1(
         cls,
@@ -60,19 +74,10 @@ class KVConnectorFactory:
                              f"but found {envs.VLLM_USE_V1=}")
 
         kv_transfer_config = config.kv_transfer_config
-        connector_name = kv_transfer_config.kv_connector
-        if connector_name in cls._registry:
-            connector_cls = cls._registry[connector_name]()
-        else:
-            connector_module_path = kv_transfer_config.kv_connector_module_path
-            if connector_module_path is None:
-                raise ValueError(
-                    f"Unsupported connector type: {connector_name}")
-            connector_module = importlib.import_module(connector_module_path)
-            connector_cls = getattr(connector_module, connector_name)
+        connector_cls = cls.get_connector_class(kv_transfer_config)
         assert issubclass(connector_cls, KVConnectorBase_V1)
         logger.info("Creating v1 connector with name: %s and engine_id: %s",
-                    connector_name, kv_transfer_config.engine_id)
+                    connector_cls.__name__, kv_transfer_config.engine_id)
         # NOTE(Kuntai): v1 connector is explicitly separated into two roles.
         # Scheduler connector:
         # - Co-locate with scheduler process
diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py
index 459a532989140..559c233947ce8 100644
--- a/vllm/distributed/kv_transfer/kv_connector/utils.py
+++ b/vllm/distributed/kv_transfer/kv_connector/utils.py
@@ -13,6 +13,8 @@ import torch
 import vllm.envs as envs
 from vllm import _custom_ops as ops
 from vllm.config import VllmConfig, get_current_vllm_config
+from vllm.distributed.kv_transfer.kv_connector.factory import (
+    KVConnectorFactory)
 from vllm.logger import init_logger
 from vllm.v1.outputs import ModelRunnerOutput
 
@@ -103,15 +105,14 @@ def get_kv_connector_cache_layout():
     # used for faster transfer.
     vllm_config = get_current_vllm_config()
     kv_config = vllm_config.kv_transfer_config
-    if kv_config is not None and vllm_config.model_config is None:
-        logger.warning_once("Unable to detect current VLLM config. " \
-        "Defaulting to NHD kv cache layout.")
-    elif kv_config is not None:
-        use_mla = vllm_config.model_config.use_mla
-        if not use_mla and kv_config.kv_connector == "NixlConnector":
-            logger.info_once("NixlConnector detected. Setting KV cache " \
-            "layout to HND for better xfer performance.")
-            return "HND"
+    if kv_config is not None:
+        connector_cls = KVConnectorFactory.get_connector_class(kv_config)
+        required_kvcache_layout = connector_cls.get_required_kvcache_layout(
+            vllm_config)
+        if required_kvcache_layout is not None:
+            return required_kvcache_layout
+        logger.info_once("Connectors do not specify a " \
+                         "kv cache layout, defaulting to NHD.")
     return "NHD"
 
 
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/base.py b/vllm/distributed/kv_transfer/kv_connector/v1/base.py
index 8bbdd7e0621c6..7a2ccb58656fd 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/base.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/base.py
@@ -299,3 +299,17 @@ class KVConnectorBase_V1(ABC):
             returned by the engine.
         """
         return False, None
+
+    @classmethod
+    def get_required_kvcache_layout(
+            cls, vllm_config: "VllmConfig") -> Optional[str]:
+        """
+        Get the required KV cache layout for this connector.
+        Args:
+            vllm_config (VllmConfig): the vllm config.
+
+        Returns:
+            str: the required KV cache layout. e.g. HND, or NHD.
+            None if the connector does not require a specific layout.
+        """
+        return None
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
index a2eaa0040191e..934a03a12ee5e 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
@@ -202,3 +202,36 @@ class MultiConnector(KVConnectorBase_V1):
         self._requests_to_connector.pop(request.request_id, None)
 
         return async_saves > 0, kv_txfer_params
+
+    @classmethod
+    def get_required_kvcache_layout(
+            cls, vllm_config: "VllmConfig") -> Optional[str]:
+        """
+        Get the required KV cache layout for this connector.
+        Args:
+            vllm_config (VllmConfig): the vllm config.
+
+        Returns:
+            str: the required KV cache layout. e.g. HND, or NHD.
+            None if the connector does not require a specific layout.
+        """
+        ktcs = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
+            "connectors")
+        assert ktcs is not None
+        layouts: set[str] = set()
+        temp_vllm_config = copy.copy(vllm_config)
+        for ktc in ktcs:
+            kv_transfer_config = KVTransferConfig(**ktc)
+            temp_vllm_config.kv_transfer_config = kv_transfer_config
+            required_kvcache_layout = KVConnectorFactory.get_connector_class(
+                kv_transfer_config).get_required_kvcache_layout(
+                    temp_vllm_config)
+            if required_kvcache_layout is not None:
+                layouts.add(required_kvcache_layout)
+
+        if len(layouts) > 1:
+            raise ValueError(f"KV cache layout mismatch: "
+                             f"found {len(layouts)} different layouts "
+                             f"({', '.join(layouts) })."
+                             f"All connectors must use the same layout.")
+        return next(iter(layouts), None)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
index 6d86ab7f7a4c2..e7fc2b118145c 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
@@ -133,6 +133,25 @@ class NixlConnector(KVConnectorBase_V1):
             self.connector_worker = NixlConnectorWorker(
                 vllm_config, self.engine_id)
 
+    ############################################################
+    # Class Methods
+    ############################################################
+    @classmethod
+    def get_required_kvcache_layout(cls, vllm_config: VllmConfig):
+        if vllm_config.model_config is None:
+            logger.warning_once("Unable to detect current VLLM config. "
+                                "Fallback to default kv cache layout.")
+            return None
+        use_mla = vllm_config.model_config.use_mla
+        if use_mla:
+            # return None when we have mla
+            # as the layout should not matter in that case,
+            # which fallback to the default behavior.
+            return None
+        logger.info_once("NixlConnector setting KV cache "
+                         "layout to HND for better xfer performance.")
+        return "HND"
+
     ############################################################
     # Scheduler Side Methods
     ############################################################
@@ -236,13 +255,13 @@ class NixlConnectorScheduler:
         """
         For remote prefill, pull all prompt blocks from remote
         asynchronously relative to engine execution.
-        
+
         Args:
             request (Request): the request object.
             num_computed_tokens (int): the number of locally
                 computed tokens for this request
         Returns:
-            * the number of tokens that can be loaded from the 
+            * the number of tokens that can be loaded from the
               external KV cache beyond what is already computed.
             * true if the external KV cache tokens will be loaded
               asynchronously (between scheduler steps).

From 8f0d5167155247934d247eb10ae086108db8d473 Mon Sep 17 00:00:00 2001
From: wenxindongwork <161090399+wenxindongwork@users.noreply.github.com>
Date: Wed, 30 Jul 2025 10:02:12 -0700
Subject: [PATCH 073/224] [TPU] Support Pathways in vLLM (#21417)

Signed-off-by: wenxindongwork <wenxindong@google.com>
---
 vllm/envs.py               |  5 +++++
 vllm/platforms/__init__.py | 18 ++++++++++++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/vllm/envs.py b/vllm/envs.py
index ec4b0888d0f40..19bc9156b2586 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -124,6 +124,7 @@ if TYPE_CHECKING:
     VLLM_V1_USE_OUTLINES_CACHE: bool = False
     VLLM_TPU_BUCKET_PADDING_GAP: int = 0
     VLLM_TPU_MOST_MODEL_LEN: Optional[int] = None
+    VLLM_TPU_USING_PATHWAYS: bool = False
     VLLM_USE_DEEP_GEMM: bool = False
     VLLM_USE_FLASHINFER_MOE_FP8: bool = False
     VLLM_USE_FLASHINFER_MOE_FP4: bool = False
@@ -900,6 +901,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_TPU_MOST_MODEL_LEN":
     lambda: maybe_convert_int(os.environ.get("VLLM_TPU_MOST_MODEL_LEN", None)),
 
+    # Whether using Pathways
+    "VLLM_TPU_USING_PATHWAYS":
+    lambda: bool("proxy" in os.getenv("JAX_PLATFORMS", "").lower()),
+
     # Allow use of DeepGemm kernels for fused moe ops.
     "VLLM_USE_DEEP_GEMM":
     lambda: bool(int(os.getenv("VLLM_USE_DEEP_GEMM", "0"))),
diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py
index c13659f8a06e6..56edb8629e45b 100644
--- a/vllm/platforms/__init__.py
+++ b/vllm/platforms/__init__.py
@@ -1,11 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
 import logging
 import traceback
 from itertools import chain
 from typing import TYPE_CHECKING, Optional
 
+from vllm import envs
 from vllm.plugins import load_plugins_by_group
 from vllm.utils import resolve_obj_by_qualname, supports_xccl
 
@@ -31,20 +31,26 @@ def vllm_version_matches_substr(substr: str) -> bool:
 
 
 def tpu_platform_plugin() -> Optional[str]:
-    is_tpu = False
     logger.debug("Checking if TPU platform is available.")
+
+    # Check for Pathways TPU proxy
+    if envs.VLLM_TPU_USING_PATHWAYS:
+        logger.debug("Confirmed TPU platform is available via Pathways proxy.")
+        return "tpu_commons.platforms.tpu_jax.TpuPlatform"
+
+    # Check for libtpu installation
     try:
         # While it's technically possible to install libtpu on a
         # non-TPU machine, this is a very uncommon scenario. Therefore,
-        # we assume that libtpu is installed if and only if the machine
+        # we assume that libtpu is installed only if the machine
         # has TPUs.
+
         import libtpu  # noqa: F401
-        is_tpu = True
         logger.debug("Confirmed TPU platform is available.")
+        return "vllm.platforms.tpu.TpuPlatform"
     except Exception as e:
         logger.debug("TPU platform is not available because: %s", str(e))
-
-    return "vllm.platforms.tpu.TpuPlatform" if is_tpu else None
+        return None
 
 
 def cuda_platform_plugin() -> Optional[str]:

From 56bd537dde023f2d8372257255af45fa784ee739 Mon Sep 17 00:00:00 2001
From: Nick Hill <nhill@redhat.com>
Date: Wed, 30 Jul 2025 18:20:20 +0100
Subject: [PATCH 074/224] [Misc] Support more collective_rpc return types
 (#21845)

Signed-off-by: Nick Hill <nhill@redhat.com>
---
 tests/v1/engine/test_engine_core_client.py | 65 +++++++++++++++++++++-
 vllm/v1/engine/__init__.py                 |  9 ++-
 vllm/v1/engine/core.py                     |  6 +-
 vllm/v1/engine/core_client.py              |  3 +-
 vllm/v1/serial_utils.py                    | 44 +++++++++++++++
 5 files changed, 121 insertions(+), 6 deletions(-)

diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py
index 2ac6dc796bd10..f648c38a63f79 100644
--- a/tests/v1/engine/test_engine_core_client.py
+++ b/tests/v1/engine/test_engine_core_client.py
@@ -6,8 +6,9 @@ import os
 import signal
 import time
 import uuid
+from dataclasses import dataclass
 from threading import Thread
-from typing import Optional
+from typing import Optional, Union
 from unittest.mock import MagicMock
 
 import pytest
@@ -292,6 +293,68 @@ async def test_engine_core_client_asyncio(monkeypatch: pytest.MonkeyPatch):
             client.shutdown()
 
 
+@dataclass
+class MyDataclass:
+    message: str
+
+
+# Dummy utility function to monkey-patch into engine core.
+def echo_dc(
+    self,
+    msg: str,
+    return_list: bool = False,
+) -> Union[MyDataclass, list[MyDataclass]]:
+    print(f"echo dc util function called: {msg}")
+    # Return dataclass to verify support for returning custom types
+    # (for which there is special handling to make it work with msgspec).
+    return [MyDataclass(msg) for _ in range(3)] if return_list \
+        else MyDataclass(msg)
+
+
+@pytest.mark.asyncio(loop_scope="function")
+async def test_engine_core_client_util_method_custom_return(
+        monkeypatch: pytest.MonkeyPatch):
+
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_USE_V1", "1")
+
+        # Must set insecure serialization to allow returning custom types.
+        m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+
+        # Monkey-patch core engine utility function to test.
+        m.setattr(EngineCore, "echo_dc", echo_dc, raising=False)
+
+        engine_args = EngineArgs(model=MODEL_NAME, enforce_eager=True)
+        vllm_config = engine_args.create_engine_config(
+            usage_context=UsageContext.UNKNOWN_CONTEXT)
+        executor_class = Executor.get_class(vllm_config)
+
+        with set_default_torch_num_threads(1):
+            client = EngineCoreClient.make_client(
+                multiprocess_mode=True,
+                asyncio_mode=True,
+                vllm_config=vllm_config,
+                executor_class=executor_class,
+                log_stats=True,
+            )
+
+        try:
+            # Test utility method returning custom / non-native data type.
+            core_client: AsyncMPClient = client
+
+            result = await core_client.call_utility_async(
+                "echo_dc", "testarg2", False)
+            assert isinstance(result,
+                              MyDataclass) and result.message == "testarg2"
+            result = await core_client.call_utility_async(
+                "echo_dc", "testarg2", True)
+            assert isinstance(result, list) and all(
+                isinstance(r, MyDataclass) and r.message == "testarg2"
+                for r in result)
+        finally:
+            client.shutdown()
+
+
 @pytest.mark.parametrize(
     "multiprocessing_mode,publisher_config",
     [(True, "tcp"), (False, "inproc")],
diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py
index 79dc80d8fc547..810d03f32d726 100644
--- a/vllm/v1/engine/__init__.py
+++ b/vllm/v1/engine/__init__.py
@@ -123,6 +123,13 @@ class EngineCoreOutput(
         return self.finish_reason is not None
 
 
+class UtilityResult:
+    """Wrapper for special handling when serializing/deserializing."""
+
+    def __init__(self, r: Any = None):
+        self.result = r
+
+
 class UtilityOutput(
         msgspec.Struct,
         array_like=True,  # type: ignore[call-arg]
@@ -132,7 +139,7 @@ class UtilityOutput(
 
     # Non-None implies the call failed, result should be None.
     failure_message: Optional[str] = None
-    result: Any = None
+    result: Optional[UtilityResult] = None
 
 
 class EngineCoreOutputs(
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 39fda521f36af..9f2fca6961388 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -36,7 +36,7 @@ from vllm.v1.core.sched.scheduler import Scheduler as V1Scheduler
 from vllm.v1.engine import (EngineCoreOutputs, EngineCoreRequest,
                             EngineCoreRequestType,
                             ReconfigureDistributedRequest, ReconfigureRankType,
-                            UtilityOutput)
+                            UtilityOutput, UtilityResult)
 from vllm.v1.engine.mm_input_cache import MirroredProcessingCache
 from vllm.v1.engine.utils import EngineHandshakeMetadata, EngineZmqAddresses
 from vllm.v1.executor.abstract import Executor
@@ -715,8 +715,8 @@ class EngineCoreProc(EngineCore):
             output = UtilityOutput(call_id)
             try:
                 method = getattr(self, method_name)
-                output.result = method(
-                    *self._convert_msgspec_args(method, args))
+                result = method(*self._convert_msgspec_args(method, args))
+                output.result = UtilityResult(result)
             except BaseException as e:
                 logger.exception("Invocation of %s method failed", method_name)
                 output.failure_message = (f"Call to {method_name} method"
diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
index acff5bf6823d9..fdf5a5de191c0 100644
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -552,7 +552,8 @@ def _process_utility_output(output: UtilityOutput,
     if output.failure_message is not None:
         future.set_exception(Exception(output.failure_message))
     else:
-        future.set_result(output.result)
+        assert output.result is not None
+        future.set_result(output.result.result)
 
 
 class SyncMPClient(MPClient):
diff --git a/vllm/v1/serial_utils.py b/vllm/v1/serial_utils.py
index 03200c2c2f8ec..4b6a983252b0e 100644
--- a/vllm/v1/serial_utils.py
+++ b/vllm/v1/serial_utils.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import dataclasses
+import importlib
 import pickle
 from collections.abc import Sequence
 from inspect import isclass
@@ -9,6 +10,7 @@ from types import FunctionType
 from typing import Any, Optional, Union
 
 import cloudpickle
+import msgspec
 import numpy as np
 import torch
 import zmq
@@ -22,6 +24,7 @@ from vllm.multimodal.inputs import (BaseMultiModalField,
                                     MultiModalFlatField, MultiModalKwargs,
                                     MultiModalKwargsItem,
                                     MultiModalSharedField, NestedTensors)
+from vllm.v1.engine import UtilityResult
 
 logger = init_logger(__name__)
 
@@ -46,6 +49,10 @@ def _log_insecure_serialization_warning():
                         "VLLM_ALLOW_INSECURE_SERIALIZATION=1")
 
 
+def _typestr(t: type):
+    return t.__module__, t.__qualname__
+
+
 class MsgpackEncoder:
     """Encoder with custom torch tensor and numpy array serialization.
 
@@ -122,6 +129,18 @@ class MsgpackEncoder:
                     for itemlist in mm._items_by_modality.values()
                     for item in itemlist]
 
+        if isinstance(obj, UtilityResult):
+            result = obj.result
+            if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION or result is None:
+                return None, result
+            # Since utility results are not strongly typed, we also encode
+            # the type (or a list of types in the case it's a list) to
+            # help with correct msgspec deserialization.
+            cls = result.__class__
+            return _typestr(cls) if cls is not list else [
+                _typestr(type(v)) for v in result
+            ], result
+
         if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION:
             raise TypeError(f"Object of type {type(obj)} is not serializable"
                             "Set VLLM_ALLOW_INSECURE_SERIALIZATION=1 to allow "
@@ -237,8 +256,33 @@ class MsgpackDecoder:
                     k: self._decode_nested_tensors(v)
                     for k, v in obj.items()
                 })
+            if t is UtilityResult:
+                return self._decode_utility_result(obj)
         return obj
 
+    def _decode_utility_result(self, obj: Any) -> UtilityResult:
+        result_type, result = obj
+        if result_type is not None:
+            if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION:
+                raise TypeError("VLLM_ALLOW_INSECURE_SERIALIZATION must "
+                                "be set to use custom utility result types")
+            assert isinstance(result_type, list)
+            if len(result_type) == 2 and isinstance(result_type[0], str):
+                result = self._convert_result(result_type, result)
+            else:
+                assert isinstance(result, list)
+                result = [
+                    self._convert_result(rt, r)
+                    for rt, r in zip(result_type, result)
+                ]
+        return UtilityResult(result)
+
+    def _convert_result(self, result_type: Sequence[str], result: Any):
+        mod_name, name = result_type
+        mod = importlib.import_module(mod_name)
+        result_type = getattr(mod, name)
+        return msgspec.convert(result, result_type, dec_hook=self.dec_hook)
+
     def _decode_ndarray(self, arr: Any) -> np.ndarray:
         dtype, shape, data = arr
         # zero-copy decode. We assume the ndarray will not be kept around,

From b9b753e7a7d95311186bbfc2b30b643a2f9e6ca1 Mon Sep 17 00:00:00 2001
From: Doug Smith <dosmith@redhat.com>
Date: Wed, 30 Jul 2025 16:04:40 -0400
Subject: [PATCH 075/224] For VLLM_USE_PRECOMPILED, only compiled .so files
 should be extracted (#21964)

---
 setup.py | 79 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 44 insertions(+), 35 deletions(-)

diff --git a/setup.py b/setup.py
index 58e5833f16ae1..bf3391e2db19e 100644
--- a/setup.py
+++ b/setup.py
@@ -371,40 +371,31 @@ class repackage_wheel(build_ext):
                 raise SetupError(
                     f"Failed to get vLLM wheel from {wheel_location}") from e
 
-        # During a docker build: determine correct filename, copy wheel.
-        if envs.VLLM_DOCKER_BUILD_CONTEXT:
-            dist_dir = "/workspace/dist"
-            os.makedirs(dist_dir, exist_ok=True)
-            # Determine correct wheel filename from METADATA
-            with zipfile.ZipFile(wheel_path, "r") as z:
-                metadata_file = next(
-                    (n for n in z.namelist()
-                     if n.endswith(".dist-info/METADATA")),
-                    None,
-                )
-                if not metadata_file:
-                    raise RuntimeError(
-                        "Could not find METADATA in precompiled wheel.")
-                metadata = z.read(metadata_file).decode()
-                version_line = next((line for line in metadata.splitlines()
-                                     if line.startswith("Version: ")), None)
-                if not version_line:
-                    raise RuntimeError(
-                        "Could not determine version from METADATA.")
-                version = version_line.split(": ")[1].strip()
+        # Set the dist_dir for Docker build context
+        dist_dir = ("/workspace/dist"
+                    if envs.VLLM_DOCKER_BUILD_CONTEXT else "dist")
+        os.makedirs(dist_dir, exist_ok=True)
 
-            # Build correct filename using internal version
-            arch_tag = "cp38-abi3-manylinux1_x86_64"
-            corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
-            final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
-
-            print(f"Docker build context detected, copying precompiled wheel "
-                  f"({version}) to {final_wheel_path}")
-            shutil.copy2(wheel_path, final_wheel_path)
-            return
-
-        # Unzip the wheel when not in Docker context
+        # Extract only necessary compiled .so files from precompiled wheel
         with zipfile.ZipFile(wheel_path) as wheel:
+            # Get version from METADATA (optional, mostly useful for logging)
+            metadata_file = next((n for n in wheel.namelist()
+                                  if n.endswith(".dist-info/METADATA")), None)
+            if not metadata_file:
+                raise RuntimeError(
+                    "Could not find METADATA in precompiled wheel.")
+            metadata = wheel.read(metadata_file).decode()
+            version_line = next((line for line in metadata.splitlines()
+                                 if line.startswith("Version: ")), None)
+            if not version_line:
+                raise RuntimeError(
+                    "Could not determine version from METADATA.")
+            version = version_line.split(": ")[1].strip()
+
+            print(f"Extracting precompiled kernels from vLLM wheel version: "
+                  f"{version}")
+
+            # List of compiled shared objects to extract
             files_to_copy = [
                 "vllm/_C.abi3.so",
                 "vllm/_moe_C.abi3.so",
@@ -413,6 +404,7 @@ class repackage_wheel(build_ext):
                 "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
                 "vllm/cumem_allocator.abi3.so",
             ]
+
             file_members = list(
                 filter(lambda x: x.filename in files_to_copy, wheel.filelist))
             compiled_regex = re.compile(
@@ -430,9 +422,26 @@ class repackage_wheel(build_ext):
                 if package_name not in package_data:
                     package_data[package_name] = []
 
-                wheel.extract(file)
-                if not file_name.endswith(".py"):
-                    package_data[package_name].append(file_name)
+                output_base = (dist_dir
+                               if envs.VLLM_DOCKER_BUILD_CONTEXT else ".")
+                target_path = os.path.join(output_base, file.filename)
+                os.makedirs(os.path.dirname(target_path), exist_ok=True)
+                with wheel.open(file.filename) as src, open(target_path,
+                                                            "wb") as dst:
+                    shutil.copyfileobj(src, dst)
+
+                package_data[package_name].append(file_name)
+
+        # Copy wheel into dist dir for Docker to consume (e.g., via --mount)
+        if envs.VLLM_DOCKER_BUILD_CONTEXT:
+            arch_tag = "cp38-abi3-manylinux1_x86_64"
+            corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
+            final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
+
+            print(
+                "Docker build context detected, copying precompiled wheel to "
+                f"{final_wheel_path}")
+            shutil.copy2(wheel_path, final_wheel_path)
 
 
 def _no_device() -> bool:

From f12d9256b39f058b93c201cedc7ffd9e605e9db8 Mon Sep 17 00:00:00 2001
From: Ming Yang <minos.future@gmail.com>
Date: Wed, 30 Jul 2025 13:15:06 -0700
Subject: [PATCH 076/224] [Misc] Use dracut on CentOS and skip clone if repo
 exists for EP kernel installation (#21635)

Signed-off-by: Ming Yang <minos.future@gmail.com>
---
 tools/ep_kernels/configure_system_drivers.sh | 12 +++++-
 tools/ep_kernels/install_python_libraries.sh | 40 +++++++++++++++++++-
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/tools/ep_kernels/configure_system_drivers.sh b/tools/ep_kernels/configure_system_drivers.sh
index cf15c1daccaec..b8bd8b8f6f550 100644
--- a/tools/ep_kernels/configure_system_drivers.sh
+++ b/tools/ep_kernels/configure_system_drivers.sh
@@ -2,6 +2,16 @@ set -ex
 
 # turn on IBGDA
 echo 'options nvidia NVreg_EnableStreamMemOPs=1 NVreg_RegistryDwords="PeerMappingOverride=1;"' | tee -a /etc/modprobe.d/nvidia.conf
-update-initramfs -u
+
+if command -v update-initramfs &> /dev/null; then
+    # for Debian/Ubuntu
+    sudo update-initramfs -u
+elif command -v dracut &> /dev/null; then
+    # for Fedora/CentOS
+    sudo dracut --force
+else
+    echo "No supported initramfs update tool found."
+    exit 1
+fi
 
 echo "Please reboot the system to apply the changes"
diff --git a/tools/ep_kernels/install_python_libraries.sh b/tools/ep_kernels/install_python_libraries.sh
index 83643c084bf9a..9d1b2da3b4122 100644
--- a/tools/ep_kernels/install_python_libraries.sh
+++ b/tools/ep_kernels/install_python_libraries.sh
@@ -53,9 +53,45 @@ popd
 
 export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem_install:$CMAKE_PREFIX_PATH
 
+is_git_dirty() {
+    local dir=$1
+    pushd "$dir" > /dev/null
+
+    if [ -d ".git" ] && [ -n "$(git status --porcelain 2>/dev/null)" ]; then
+        popd > /dev/null
+        return 0  # dirty (true)
+    else
+        popd > /dev/null
+        return 1  # clean (false)
+    fi
+}
+
+# Function to handle git repository cloning with dirty/incomplete checks
+clone_repo() {
+    local repo_url=$1
+    local dir_name=$2
+    local key_file=$3
+
+    if [ -d "$dir_name" ]; then
+        # Check if directory has uncommitted changes (dirty)
+        if is_git_dirty "$dir_name"; then
+            echo "$dir_name directory is dirty, skipping clone"
+        # Check if clone failed (directory exists but not a valid git repo or missing key files)
+        elif [ ! -d "$dir_name/.git" ] || [ ! -f "$dir_name/$key_file" ]; then
+            echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning"
+            rm -rf "$dir_name"
+            git clone "$repo_url"
+        else
+            echo "$dir_name directory exists and appears complete; manually update if needed"
+        fi
+    else
+        git clone "$repo_url"
+    fi
+}
+
 # build and install pplx, require pytorch installed
 pushd $WORKSPACE
-git clone https://github.com/ppl-ai/pplx-kernels
+clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py"
 cd pplx-kernels
 # see https://github.com/pypa/pip/issues/9955#issuecomment-838065925
 # PIP_NO_BUILD_ISOLATION=0 disables build isolation
@@ -64,7 +100,7 @@ popd
 
 # build and install deepep, require pytorch installed
 pushd $WORKSPACE
-git clone https://github.com/deepseek-ai/DeepEP
+clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py"
 cd DeepEP
 export NVSHMEM_DIR=$WORKSPACE/nvshmem_install
 PIP_NO_BUILD_ISOLATION=0 pip install -vvv -e  .

From 287f527f5403bb42a32136cf6c802faeb92a09ef Mon Sep 17 00:00:00 2001
From: cascade <cascade812@outlook.com>
Date: Wed, 30 Jul 2025 14:23:41 -0700
Subject: [PATCH 077/224] [Feature] Add async tensor parallelism for scaled mm
 (#20155)

Signed-off-by: cascade812 <cascade812@outlook.com>
---
 tests/compile/test_async_tp.py           | 143 ++++++++++++-
 vllm/compilation/collective_fusion.py    | 244 ++++++++++++++++++++++-
 vllm/compilation/sequence_parallelism.py |   2 +-
 3 files changed, 381 insertions(+), 8 deletions(-)

diff --git a/tests/compile/test_async_tp.py b/tests/compile/test_async_tp.py
index 916ec2b83df4f..9a51e6b3514f4 100644
--- a/tests/compile/test_async_tp.py
+++ b/tests/compile/test_async_tp.py
@@ -22,6 +22,8 @@ from ..utils import (compare_two_settings, create_new_process_for_each_test,
                      multi_gpu_test)
 from .backend import TestBackend
 
+FP8_DTYPE = current_platform.fp8_dtype()
+
 prompts = [
     "Hello, my name is",
     "The president of the United States is",
@@ -32,9 +34,10 @@ prompts = [
 
 class TestMMRSModel(torch.nn.Module):
 
-    def __init__(self, hidden_size=16):
+    def __init__(self, hidden_size=16, dtype=torch.float16):
         super().__init__()
         self.hidden_size = hidden_size
+        self.dtype = dtype
         self.gate_proj = torch.nn.Parameter(torch.empty(
             (self.hidden_size * 2, hidden_size)),
                                             requires_grad=False)
@@ -64,9 +67,10 @@ class TestMMRSModel(torch.nn.Module):
 
 class TestAGMMModel(torch.nn.Module):
 
-    def __init__(self, hidden_size=16):
+    def __init__(self, hidden_size=16, dtype=torch.float16):
         super().__init__()
         self.hidden_size = hidden_size
+        self.dtype = dtype
         self.weight = torch.nn.Parameter(torch.empty(
             (hidden_size, hidden_size)),
                                          requires_grad=False)
@@ -91,8 +95,125 @@ class TestAGMMModel(torch.nn.Module):
         return [torch.ops.symm_mem.fused_all_gather_matmul.default]
 
 
+class _BaseScaledMMModel(torch.nn.Module):
+
+    def __init__(self, hidden_size=16, dtype=torch.float16):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.dtype = dtype
+        self.weight = torch.empty([hidden_size, hidden_size], dtype=FP8_DTYPE)\
+            .contiguous().transpose(0, 1)
+
+        # Initialize scale_b for _scaled_mm.
+        self.scale_b = torch.ones(1, self.hidden_size, dtype=torch.float32)
+
+
+class TestScaledMMRSModel(_BaseScaledMMModel):
+
+    def forward(self, input: torch.Tensor):
+        """
+        Forward pass implementing the scaled_mm + reduce scatter in the FX graph
+    
+        """
+        fp8_input = input.to(FP8_DTYPE)
+        scale_a = torch.ones(input.shape[0], 1, dtype=torch.float32)
+        scaled_mm = torch._scaled_mm(fp8_input,
+                                     self.weight,
+                                     scale_a=scale_a,
+                                     scale_b=self.scale_b,
+                                     out_dtype=self.dtype)
+        reduce_scatter = tensor_model_parallel_reduce_scatter(scaled_mm, dim=0)
+        return reduce_scatter
+
+    def ops_in_model_before(self):
+        return [torch.ops.vllm.reduce_scatter.default]
+
+    def ops_in_model_after(self):
+        return [torch.ops.symm_mem.fused_scaled_matmul_reduce_scatter.default]
+
+
+class TestAGScaledMMModel(_BaseScaledMMModel):
+
+    def forward(self, input: torch.Tensor):
+        """
+        Forward pass implementing the all gather + scaled_mm in the FX graph
+        """
+        # Reshape input
+        fp8_input = input.to(FP8_DTYPE)
+        all_gather = tensor_model_parallel_all_gather(fp8_input, dim=0)
+
+        scale_a = torch.ones(all_gather.shape[0], 1, dtype=torch.float32)
+        scaled_mm = torch._scaled_mm(all_gather,
+                                     self.weight,
+                                     scale_a=scale_a,
+                                     scale_b=self.scale_b,
+                                     out_dtype=self.dtype)
+        return scaled_mm
+
+    def ops_in_model_before(self):
+        return [torch.ops.vllm.all_gather.default]
+
+    def ops_in_model_after(self):
+        return [torch.ops.symm_mem.fused_all_gather_scaled_matmul.default]
+
+
+class TestCutlassScaledMMRSModel(_BaseScaledMMModel):
+
+    def forward(self, input: torch.Tensor):
+        """
+        Forward pass implementing the cutlass_scaled_mm + reduce scatter
+        in the FX graph
+    
+        """
+        fp8_input = input.to(FP8_DTYPE)
+        scale_a = torch.ones(input.shape[0], 1, dtype=torch.float32)
+        mm_out = torch.empty((fp8_input.shape[0], self.weight.shape[1]),
+                             dtype=self.dtype,
+                             device=input.device)
+        torch.ops._C.cutlass_scaled_mm(mm_out, fp8_input, self.weight, scale_a,
+                                       self.scale_b, None)
+        reduce_scatter = tensor_model_parallel_reduce_scatter(mm_out, dim=0)
+        return reduce_scatter
+
+    def ops_in_model_before(self):
+        return [torch.ops.vllm.reduce_scatter.default]
+
+    def ops_in_model_after(self):
+        return [torch.ops.symm_mem.fused_scaled_matmul_reduce_scatter.default]
+
+
+class TestAGCutlassScaledMMModel(_BaseScaledMMModel):
+
+    def forward(self, input: torch.Tensor):
+        """
+        Forward pass implementing the all gather + cutlass_scaled_mm 
+        in the FX graph
+        """
+        # Reshape input
+        fp8_input = input.to(FP8_DTYPE)
+        all_gather = tensor_model_parallel_all_gather(fp8_input, dim=0)
+
+        scale_a = torch.ones(all_gather.shape[0], 1, dtype=torch.float32)
+
+        mm_out = torch.empty((all_gather.shape[0], self.weight.shape[1]),
+                             dtype=self.dtype,
+                             device=all_gather.device)
+        torch.ops._C.cutlass_scaled_mm(mm_out, all_gather, self.weight,
+                                       scale_a, self.scale_b, None)
+        return mm_out
+
+    def ops_in_model_before(self):
+        return [torch.ops.vllm.all_gather.default]
+
+    def ops_in_model_after(self):
+        return [torch.ops.symm_mem.fused_all_gather_scaled_matmul.default]
+
+
 @multi_gpu_test(num_gpus=2)
-@pytest.mark.parametrize("test_model", [TestMMRSModel, TestAGMMModel])
+@pytest.mark.parametrize("test_model", [
+    TestMMRSModel, TestAGMMModel, TestScaledMMRSModel, TestAGScaledMMModel,
+    TestCutlassScaledMMRSModel, TestAGCutlassScaledMMModel
+])
 @pytest.mark.parametrize("batch_size", [8])
 @pytest.mark.parametrize("seq_len", [16])
 @pytest.mark.parametrize("hidden_size", [16])
@@ -101,6 +222,14 @@ class TestAGMMModel(torch.nn.Module):
                     reason="Only test on CUDA")
 def test_async_tp_pass_replace(test_model: str, batch_size: int, seq_len: int,
                                hidden_size: int, dtype: torch.dtype):
+    if test_model in (TestScaledMMRSModel, TestAGScaledMMModel,
+                      TestCutlassScaledMMRSModel,
+                      TestAGCutlassScaledMMModel) and dtype == torch.float16:
+        pytest.skip(
+            "Only bf16 high precision output types are supported for " \
+            "per-token (row-wise) scaling"
+        )
+
     num_processes = 2
 
     def run_torch_spawn(fn, nprocs):
@@ -155,7 +284,8 @@ def async_tp_pass_on_test_model(local_rank: int, world_size: int,
     async_tp_pass = AsyncTPPass(vllm_config)
     backend = TestBackend(async_tp_pass)
 
-    model = test_model_cls(hidden_size)
+    model = test_model_cls(hidden_size,
+                           dtype)  # Pass dtype to model constructor
 
     hidden_states = torch.randn((batch_size * seq_len, hidden_size),
                                 dtype=dtype,
@@ -174,7 +304,10 @@ def async_tp_pass_on_test_model(local_rank: int, world_size: int,
 
 
 @create_new_process_for_each_test()
-@pytest.mark.parametrize("model_id", ["meta-llama/Llama-3.2-1B-Instruct"])
+@pytest.mark.parametrize("model_id", [
+    "meta-llama/Llama-3.2-1B-Instruct",
+    "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"
+])
 @pytest.mark.parametrize("tp_size", [2])
 @pytest.mark.parametrize("async_tp_enabled", [True])
 @pytest.mark.parametrize("distributed_backend", ["mp"])
diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py
index 0e7961841bd33..cb99fe8310e73 100644
--- a/vllm/compilation/collective_fusion.py
+++ b/vllm/compilation/collective_fusion.py
@@ -15,10 +15,13 @@ from vllm.distributed import get_tp_group, tensor_model_parallel_all_reduce
 from vllm.distributed.parallel_state import (
     get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size)
 from vllm.logger import init_logger
+from vllm.platforms import current_platform
 from vllm.utils import direct_register_custom_op
 
 from .vllm_inductor_pass import VllmInductorPass
 
+FP8_DTYPE = current_platform.fp8_dtype()
+
 if find_spec("flashinfer"):
     try:
         import flashinfer.comm as flashinfer_comm
@@ -28,7 +31,6 @@ if find_spec("flashinfer"):
         flashinfer_comm = None
 else:
     flashinfer_comm = None
-from vllm.platforms import current_platform
 
 logger = init_logger(__name__)
 
@@ -118,6 +120,230 @@ class AllGatherGEMMPattern(BasePattern):
                                 pm.fwd_only, pm_pass)
 
 
+class ScaledMMReduceScatterPattern(BasePattern):
+
+    def get_inputs(self):
+        input = torch.empty([16, 16], device=self.device, dtype=FP8_DTYPE)
+        mm_weight = torch.empty([16, 16], device=self.device,
+                                dtype=FP8_DTYPE).contiguous().transpose(0, 1)
+        scale_a = torch.empty([16, 1], device=self.device, dtype=torch.float32)
+        scale_b = torch.empty([1, 16], device=self.device, dtype=torch.float32)
+        return [input, mm_weight, scale_a, scale_b]
+
+    def register(self, pm_pass: PatternMatcherPass):
+
+        def pattern(input: torch.Tensor, mat2: torch.Tensor,
+                    scale_a: torch.Tensor,
+                    scale_b: torch.Tensor) -> torch.Tensor:
+            scaled_mm = torch.ops.aten._scaled_mm.default(input,
+                                                          mat2=mat2,
+                                                          scale_a=scale_a,
+                                                          scale_b=scale_b,
+                                                          bias=None,
+                                                          scale_result=None,
+                                                          out_dtype=self.dtype)
+            reduce_scatter = torch.ops.vllm.reduce_scatter.default(
+                scaled_mm,
+                dim=0,
+                world_size=self.tp_size,
+                group_name=self.tp.unique_name)
+            return reduce_scatter
+
+        def replacement(input: torch.Tensor, mat2: torch.Tensor,
+                        scale_a: torch.Tensor,
+                        scale_b: torch.Tensor) -> torch.Tensor:
+            gemm_rs = torch.ops.symm_mem.fused_scaled_matmul_reduce_scatter(
+                input,
+                mat2,
+                scale_a,
+                scale_b,
+                "avg",
+                scatter_dim=0,
+                out_dtype=self.dtype,
+                group_name=self.tp.device_group.group_name,
+            )
+
+            return gemm_rs
+
+        pm.register_replacement(pattern, replacement, self.get_inputs(),
+                                pm.fwd_only, pm_pass)
+
+
+class AllGatherScaledMMPattern(BasePattern):
+
+    def get_inputs(self):
+        x = torch.empty([8, 16], device=self.device, dtype=FP8_DTYPE)
+        weight = torch.empty([16, 16], device=self.device,
+                             dtype=FP8_DTYPE).contiguous().transpose(0, 1)
+
+        s1 = x.shape[0] * self.tp_size
+
+        scale_a = torch.empty([s1, 1], device=self.device, dtype=torch.float32)
+        scale_b = torch.empty([1, 16], device=self.device, dtype=torch.float32)
+
+        return [x, weight, scale_a, scale_b]
+
+    def register(self, pm_pass: PatternMatcherPass):
+
+        def pattern(
+            x: torch.Tensor,
+            weight: torch.Tensor,
+            scale_a: torch.Tensor,
+            scale_b: torch.Tensor,
+        ) -> torch.Tensor:
+            all_gather = torch.ops.vllm.all_gather.default(
+                x,
+                dim=0,
+                world_size=self.tp_size,
+                group_name=self.tp.unique_name)
+
+            return torch.ops.aten._scaled_mm.default(all_gather,
+                                                     mat2=weight,
+                                                     scale_a=scale_a,
+                                                     scale_b=scale_b,
+                                                     bias=None,
+                                                     scale_result=None,
+                                                     out_dtype=self.dtype)
+
+        def replacement(x: torch.Tensor, weight: torch.Tensor,
+                        scale_a: torch.Tensor,
+                        scale_b: torch.Tensor) -> torch.Tensor:
+            ag_output, mm_outputs = torch.ops.symm_mem.fused_all_gather_scaled_matmul(  # noqa
+                x,
+                [weight],
+                scale_a,
+                [scale_b],
+                gather_dim=0,
+                biases=[None],
+                result_scales=[None],
+                out_dtypes=[self.dtype],
+                use_fast_accum=[False],
+                group_name=self.tp.device_group.group_name,
+            )
+            return mm_outputs
+
+        pm.register_replacement(pattern, replacement, self.get_inputs(),
+                                pm.fwd_only, pm_pass)
+
+
+class CutlassScaledMMReduceScatterPattern(BasePattern):
+
+    def get_inputs(self):
+        input = torch.empty([16, 16], device=self.device, dtype=FP8_DTYPE)
+        mm_weight = torch.empty([16, 16], device=self.device,
+                                dtype=FP8_DTYPE).contiguous().transpose(0, 1)
+        scale_a = torch.empty([16, 1], device=self.device, dtype=torch.float32)
+        scale_b = torch.empty([1, 16], device=self.device, dtype=torch.float32)
+
+        cutlass_mm_output = torch.empty([16, 16],
+                                        device=self.device,
+                                        dtype=self.dtype)
+        return [input, mm_weight, scale_a, scale_b, cutlass_mm_output]
+
+    def register(self, pm_pass: PatternMatcherPass):
+
+        def pattern(input: torch.Tensor, weight: torch.Tensor,
+                    scale_a: torch.Tensor, scale_b: torch.Tensor,
+                    cutlass_mm_output: torch.Tensor) -> torch.Tensor:
+            cutlass_scaled_mm = torch.ops.higher_order.auto_functionalized(
+                torch.ops._C.cutlass_scaled_mm.default,
+                out=cutlass_mm_output,
+                a=input,
+                b=weight,
+                a_scales=scale_a,
+                b_scales=scale_b,
+                bias=None)
+
+            reduce_scatter = torch.ops.vllm.reduce_scatter.default(
+                cutlass_scaled_mm[1],
+                dim=0,
+                world_size=self.tp_size,
+                group_name=self.tp.unique_name)
+            return reduce_scatter
+
+        def replacement(input: torch.Tensor, mat2: torch.Tensor,
+                        scale_a: torch.Tensor, scale_b: torch.Tensor,
+                        cutlass_mm_output: torch.Tensor) -> torch.Tensor:
+            gemm_rs = torch.ops.symm_mem.fused_scaled_matmul_reduce_scatter(
+                input,
+                mat2,
+                scale_a,
+                scale_b,
+                "avg",
+                scatter_dim=0,
+                out_dtype=self.dtype,
+                group_name=self.tp.device_group.group_name,
+            )
+
+            return gemm_rs
+
+        pm.register_replacement(pattern, replacement, self.get_inputs(),
+                                pm.fwd_only, pm_pass)
+
+
+class AllGatherCutlassScaledMMPattern(BasePattern):
+
+    def get_inputs(self):
+        x = torch.empty([8, 16], device=self.device, dtype=FP8_DTYPE)
+        weight = torch.empty([16, 16], device=self.device,
+                             dtype=FP8_DTYPE).contiguous().transpose(0, 1)
+
+        s1 = x.shape[0] * self.tp_size
+
+        scale_a = torch.empty([s1, 1], device=self.device, dtype=torch.float32)
+        scale_b = torch.empty([1, 16], device=self.device, dtype=torch.float32)
+
+        s2 = weight.shape[1]
+        output = torch.empty([s1, s2], device=self.device, dtype=self.dtype)
+
+        return [x, weight, scale_a, scale_b, output]
+
+    def register(self, pm_pass: PatternMatcherPass):
+
+        def pattern(
+            x: torch.Tensor,
+            weight: torch.Tensor,
+            scale_a: torch.Tensor,
+            scale_b: torch.Tensor,
+            output: torch.Tensor,
+        ) -> torch.Tensor:
+            all_gather = torch.ops.vllm.all_gather.default(
+                x,
+                dim=0,
+                world_size=self.tp_size,
+                group_name=self.tp.unique_name)
+
+            cutlass_scaled_mm = torch.ops.higher_order.auto_functionalized(
+                torch.ops._C.cutlass_scaled_mm.default,
+                out=output,
+                a=all_gather,
+                b=weight,
+                a_scales=scale_a,
+                b_scales=scale_b,
+                bias=None)
+            return cutlass_scaled_mm[1]
+
+        def replacement(x: torch.Tensor, weight: torch.Tensor,
+                        scale_a: torch.Tensor, scale_b: torch.Tensor,
+                        output: torch.Tensor) -> torch.Tensor:
+            ag_output, mm_outputs = torch.ops.symm_mem.fused_all_gather_scaled_matmul(  # noqa
+                x,
+                [weight],
+                scale_a,
+                [scale_b],
+                gather_dim=0,
+                biases=[None],
+                result_scales=[None],
+                out_dtypes=[self.dtype],
+                use_fast_accum=[False],
+                group_name=self.tp.device_group.group_name,
+            )
+            return mm_outputs
+
+        pm.register_replacement(pattern, replacement, self.get_inputs(),
+                                pm.fwd_only, pm_pass)
+
+
 class AsyncTPPass(VllmInductorPass):
 
     def __init__(self, config: VllmConfig):
@@ -133,6 +359,20 @@ class AsyncTPPass(VllmInductorPass):
         AllGatherGEMMPattern(self.model_dtype,
                              self.device).register(self.patterns)
 
+        # These fusions are enabled only for bfloat16 models because
+        # `scaled_mm` or `cutlass_scaled_mm` with per-token (row-wise) scaling
+        # only supports bfloat16 as the output dtype.
+        if self.model_dtype == torch.bfloat16:
+            ScaledMMReduceScatterPattern(self.model_dtype,
+                                         self.device).register(self.patterns)
+            AllGatherScaledMMPattern(self.model_dtype,
+                                     self.device).register(self.patterns)
+
+            CutlassScaledMMReduceScatterPattern(
+                self.model_dtype, self.device).register(self.patterns)
+            AllGatherCutlassScaledMMPattern(
+                self.model_dtype, self.device).register(self.patterns)
+
     def is_applicable_for_shape(self, shape: Optional[int]) -> bool:
         # only do replace for specific shapes
         tp_size = get_tensor_model_parallel_world_size()
@@ -142,7 +382,7 @@ class AsyncTPPass(VllmInductorPass):
         self.begin()
         self.dump_graph(graph, "before_async_tp_pass")
         count = self.patterns.apply(graph)
-        logger.debug("Replaced %s patterns", count)
+        logger.debug("Replaced %s patterns with async TP pass.", count)
         self.dump_graph(graph, "after_async_tp_pass")
         self.end_and_log()
 
diff --git a/vllm/compilation/sequence_parallelism.py b/vllm/compilation/sequence_parallelism.py
index 6107046e40dcd..ebc025cba71ed 100644
--- a/vllm/compilation/sequence_parallelism.py
+++ b/vllm/compilation/sequence_parallelism.py
@@ -477,6 +477,6 @@ class SequenceParallelismPass(VllmInductorPass):
         self.begin()
         self.dump_graph(graph, "before_sequence_parallelism_pass")
         count = self.patterns.apply(graph)
-        logger.debug("Replaced %s patterns", count)
+        logger.debug("Replaced %s patterns with sequence parallelism", count)
         self.dump_graph(graph, "after_sequence_parallelism_pass")
         self.end_and_log()

From 601f856d5679a474b6488fb7dd75ebbd7125d1ca Mon Sep 17 00:00:00 2001
From: Bram <153647206+br4mm@users.noreply.github.com>
Date: Wed, 30 Jul 2025 14:44:02 -0700
Subject: [PATCH 078/224] [Bugfix] Fix None value handling in trace span
 creation for cancelled requests (#20272)

---
 vllm/engine/llm_engine.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 3f30a34170ffe..79255b031eeca 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1862,8 +1862,14 @@ class LLMEngine:
                 context=trace_context,
                 start_time=arrival_time_nano_seconds) as seq_span:
             metrics = seq_group.metrics
-            ttft = metrics.first_token_time - metrics.arrival_time
-            e2e_time = metrics.finished_time - metrics.arrival_time
+
+            # Handle potential None values for cancelled/aborted requests
+            ttft = (metrics.first_token_time - metrics.arrival_time
+                    if metrics.first_token_time is not None else None)
+
+            e2e_time = (metrics.finished_time - metrics.arrival_time
+                        if metrics.finished_time is not None else None)
+
             seq_span.set_attribute(SpanAttributes.GEN_AI_RESPONSE_MODEL,
                                    self.model_config.model)
             seq_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_ID,
@@ -1886,11 +1892,18 @@ class LLMEngine:
                     seq.get_output_len()
                     for seq in seq_group.get_finished_seqs()
                 ]))
-            seq_span.set_attribute(SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE,
-                                   metrics.time_in_queue)
-            seq_span.set_attribute(
-                SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN, ttft)
-            seq_span.set_attribute(SpanAttributes.GEN_AI_LATENCY_E2E, e2e_time)
+
+            # Only set timing attributes if the values are available
+            if metrics.time_in_queue is not None:
+                seq_span.set_attribute(
+                    SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE,
+                    metrics.time_in_queue)
+            if ttft is not None:
+                seq_span.set_attribute(
+                    SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN, ttft)
+            if e2e_time is not None:
+                seq_span.set_attribute(SpanAttributes.GEN_AI_LATENCY_E2E,
+                                       e2e_time)
             if metrics.scheduler_time is not None:
                 seq_span.set_attribute(
                     SpanAttributes.GEN_AI_LATENCY_TIME_IN_SCHEDULER,

From ca9e2be3ed6320b51f52f536595cd24e254f8bb2 Mon Sep 17 00:00:00 2001
From: Zebing Lin <linzebing1995@gmail.com>
Date: Wed, 30 Jul 2025 18:00:54 -0400
Subject: [PATCH 079/224] [Core] Move EngineCoreRequest to Request conversion
 out of EngineCore (#21627)

Signed-off-by: linzebing <linzebing1995@gmail.com>
---
 tests/v1/engine/test_engine_core.py | 44 ++++++++++-------
 vllm/v1/engine/core.py              | 74 ++++++++++++++++++-----------
 vllm/v1/engine/core_client.py       |  3 +-
 3 files changed, 73 insertions(+), 48 deletions(-)

diff --git a/tests/v1/engine/test_engine_core.py b/tests/v1/engine/test_engine_core.py
index eb826bf06236f..c52b98967126b 100644
--- a/tests/v1/engine/test_engine_core.py
+++ b/tests/v1/engine/test_engine_core.py
@@ -65,7 +65,8 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
         """Test basic request lifecycle."""
 
         # First request.
-        engine_core.add_request(make_request())
+        engine_core.add_request(
+            *engine_core.preprocess_add_request(make_request()))
         assert len(engine_core.scheduler.waiting) == 1
         assert len(engine_core.scheduler.running) == 0
 
@@ -74,7 +75,8 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
         assert len(engine_core.scheduler.running) == 1
 
         # Second request.
-        engine_core.add_request(make_request())
+        engine_core.add_request(
+            *engine_core.preprocess_add_request(make_request()))
         assert len(engine_core.scheduler.waiting) == 1
         assert len(engine_core.scheduler.running) == 1
 
@@ -83,8 +85,10 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
         assert len(engine_core.scheduler.running) == 2
 
         # Add two requests in a row.
-        engine_core.add_request(make_request())
-        engine_core.add_request(make_request())
+        engine_core.add_request(
+            *engine_core.preprocess_add_request(make_request()))
+        engine_core.add_request(
+            *engine_core.preprocess_add_request(make_request()))
         assert len(engine_core.scheduler.waiting) == 2
         assert len(engine_core.scheduler.running) == 2
 
@@ -104,7 +108,7 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
         req = make_request()
         request_id = req.request_id
 
-        engine_core.add_request(req)
+        engine_core.add_request(*engine_core.preprocess_add_request(req))
         assert len(engine_core.scheduler.waiting) == 1
         assert len(engine_core.scheduler.running) == 0
         assert engine_core.scheduler.has_unfinished_requests()
@@ -131,8 +135,8 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
         req1 = make_request()
         req2 = make_request()
 
-        engine_core.add_request(req0)
-        engine_core.add_request(req1)
+        engine_core.add_request(*engine_core.preprocess_add_request(req0))
+        engine_core.add_request(*engine_core.preprocess_add_request(req1))
         assert len(engine_core.scheduler.waiting) == 2
         assert len(engine_core.scheduler.running) == 0
 
@@ -140,7 +144,7 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
         assert len(engine_core.scheduler.waiting) == 0
         assert len(engine_core.scheduler.running) == 2
 
-        engine_core.add_request(req2)
+        engine_core.add_request(*engine_core.preprocess_add_request(req2))
         assert len(engine_core.scheduler.waiting) == 1
         assert len(engine_core.scheduler.running) == 2
 
@@ -166,12 +170,12 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
         req0 = make_request()
         req1 = make_request()
         req0.request_id = req1.request_id = "test"
-        engine_core.add_request(req0)
+        engine_core.add_request(*engine_core.preprocess_add_request(req0))
 
         while (outs := engine_core.step()[0].get(0)) and outs.outputs:
             pass
 
-        engine_core.add_request(req1)
+        engine_core.add_request(*engine_core.preprocess_add_request(req1))
         while (outs := engine_core.step()[0].get(0)) and outs.outputs:
             pass
 
@@ -207,7 +211,7 @@ def test_engine_core_advanced_sampling(monkeypatch: pytest.MonkeyPatch):
             repetition_penalty=0.1,
             stop_token_ids=[1001, 1002],
         )
-        engine_core.add_request(request)
+        engine_core.add_request(*engine_core.preprocess_add_request(request))
 
         def _check_engine_state():
             assert len(engine_core.scheduler.waiting) == 1
@@ -226,7 +230,7 @@ def test_engine_core_advanced_sampling(monkeypatch: pytest.MonkeyPatch):
             top_p=0.99,
             top_k=50,
         )
-        engine_core.add_request(request2)
+        engine_core.add_request(*engine_core.preprocess_add_request(request2))
         _check_engine_state()
 
 
@@ -298,9 +302,9 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
 
         # Add two requests in a row. Each request have 12 prompt tokens.
         req0 = make_request_with_max_tokens("0", 5)
-        engine_core.add_request(req0)
+        engine_core.add_request(*engine_core.preprocess_add_request(req0))
         req1 = make_request_with_max_tokens("1", 5)
-        engine_core.add_request(req1)
+        engine_core.add_request(*engine_core.preprocess_add_request(req1))
 
         # Schedule Batch 1: (10, req0)
         assert engine_core.step_with_batch_queue()[0] is None
@@ -436,7 +440,8 @@ def test_engine_core_invalid_request_id_type(monkeypatch: pytest.MonkeyPatch):
 
         with pytest.raises(TypeError,
                            match="request_id must be a string, got.*UUID"):
-            engine_core.add_request(uuid_request)
+            engine_core.add_request(
+                *engine_core.preprocess_add_request(uuid_request))
 
         # Test with integer
         int_request = make_request()
@@ -444,7 +449,8 @@ def test_engine_core_invalid_request_id_type(monkeypatch: pytest.MonkeyPatch):
 
         with pytest.raises(TypeError,
                            match="request_id must be a string, got.*int"):
-            engine_core.add_request(int_request)
+            engine_core.add_request(
+                *engine_core.preprocess_add_request(int_request))
 
         # Test with None
         none_request = make_request()
@@ -452,10 +458,12 @@ def test_engine_core_invalid_request_id_type(monkeypatch: pytest.MonkeyPatch):
 
         with pytest.raises(TypeError,
                            match="request_id must be a string, got.*NoneType"):
-            engine_core.add_request(none_request)
+            engine_core.add_request(
+                *engine_core.preprocess_add_request(none_request))
 
         # Verify engine is still functional after errors
         valid_request = make_request()
-        engine_core.add_request(valid_request)
+        engine_core.add_request(
+            *engine_core.preprocess_add_request(valid_request))
         assert len(engine_core.scheduler.waiting) == 1
         assert len(engine_core.scheduler.running) == 0
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 9f2fca6961388..f9a6315df8af8 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -205,8 +205,12 @@ class EngineCore:
     def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
         return self.model_executor.supported_tasks
 
-    def add_request(self, request: EngineCoreRequest):
-        """Add request to the scheduler."""
+    def add_request(self, request: Request, request_wave: int = 0):
+        """Add request to the scheduler.
+        
+        `request_wave`: indicate which wave of requests this is expected to
+        belong to in DP case
+        """
         # Validate the request_id type.
         if not isinstance(request.request_id, str):
             raise TypeError(
@@ -222,27 +226,12 @@ class EngineCore:
                 raise ValueError(f"Unsupported task: {pooling_params.task!r} "
                                  f"Supported tasks: {supported_pooling_tasks}")
 
-        if request.mm_hashes is not None:
-            # Here, if hash exists for a multimodal input, then it will be
-            # fetched from the cache, else it will be added to the cache.
-            # Note that the cache here is mirrored with the client cache, so
-            # anything that has a hash must have a HIT cache entry here
-            # as well.
-            assert request.mm_inputs is not None
-            request.mm_inputs = self.mm_input_cache_server.get_and_update_p1(
-                request.mm_inputs, request.mm_hashes)
-
-        req = Request.from_engine_core_request(request)
-        if req.use_structured_output:
-            # Start grammar compilation asynchronously
-            self.structured_output_manager.grammar_init(req)
-
-        if req.kv_transfer_params is not None and (
+        if request.kv_transfer_params is not None and (
                 not self.scheduler.get_kv_connector()):
             logger.warning("Got kv_transfer_params, but no KVConnector found. "
                            "Disabling KVTransfer for this request.")
 
-        self.scheduler.add_request(req)
+        self.scheduler.add_request(request)
 
     def abort_requests(self, request_ids: list[str]):
         """Abort requests from the scheduler."""
@@ -414,6 +403,31 @@ class EngineCore:
         self.model_executor.save_tensorized_model(
             tensorizer_config=tensorizer_config, )
 
+    def preprocess_add_request(
+            self, request: EngineCoreRequest) -> tuple[Request, int]:
+        """Preprocess the request.
+        
+        This function could be directly used in input processing thread to allow
+        request initialization running in parallel with Model forward
+        """
+        if request.mm_hashes is not None:
+            assert request.mm_inputs is not None
+            # Note on thread safety: no race condition.
+            # `mm_input_cache_server` is reset at the end of LLMEngine init,
+            # and will only accessed in the input processing thread afterwards.
+            request.mm_inputs = self.mm_input_cache_server.get_and_update_p1(
+                request.mm_inputs, request.mm_hashes)
+
+        req = Request.from_engine_core_request(request)
+        if req.use_structured_output:
+            # Note on thread safety: no race condition.
+            # `grammar_init` is only invoked in input processing thread. For
+            # `structured_output_manager`, each request is independent and
+            # grammar compilation is async. Scheduler always checks grammar
+            # compilation status before scheduling request.
+            self.structured_output_manager.grammar_init(req)
+        return req, request.current_wave
+
 
 class EngineCoreProc(EngineCore):
     """ZMQ-wrapper for running EngineCore in background process."""
@@ -707,7 +721,8 @@ class EngineCoreProc(EngineCore):
         """Dispatch request from client."""
 
         if request_type == EngineCoreRequestType.ADD:
-            self.add_request(request)
+            req, request_wave = request
+            self.add_request(req, request_wave)
         elif request_type == EngineCoreRequestType.ABORT:
             self.abort_requests(request)
         elif request_type == EngineCoreRequestType.UTILITY:
@@ -806,10 +821,11 @@ class EngineCoreProc(EngineCore):
                         bytes(type_frame.buffer))
 
                     # Deserialize the request data.
-                    decoder = add_request_decoder if (
-                        request_type
-                        == EngineCoreRequestType.ADD) else generic_decoder
-                    request = decoder.decode(data_frames)
+                    if request_type == EngineCoreRequestType.ADD:
+                        request = add_request_decoder.decode(data_frames)
+                        request = self.preprocess_add_request(request)
+                    else:
+                        request = generic_decoder.decode(data_frames)
 
                     # Push to input queue for core busy loop.
                     self.input_queue.put_nowait((request_type, request))
@@ -939,17 +955,17 @@ class DPEngineCoreProc(EngineCoreProc):
         if dp_group := getattr(self, "dp_group", None):
             stateless_destroy_torch_distributed_process_group(dp_group)
 
-    def add_request(self, request: EngineCoreRequest):
-        if self.has_coordinator and request.current_wave != self.current_wave:
-            if request.current_wave > self.current_wave:
-                self.current_wave = request.current_wave
+    def add_request(self, request: Request, request_wave: int = 0):
+        if self.has_coordinator and request_wave != self.current_wave:
+            if request_wave > self.current_wave:
+                self.current_wave = request_wave
             elif not self.engines_running:
                 # Request received for an already-completed wave, notify
                 # front-end that we need to start the next one.
                 self.output_queue.put_nowait(
                     (-1, EngineCoreOutputs(start_wave=self.current_wave)))
 
-        super().add_request(request)
+        super().add_request(request, request_wave)
 
     def _handle_client_request(self, request_type: EngineCoreRequestType,
                                request: Any) -> None:
diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
index fdf5a5de191c0..26985df6f62df 100644
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -250,7 +250,8 @@ class InprocClient(EngineCoreClient):
         return self.engine_core.get_supported_tasks()
 
     def add_request(self, request: EngineCoreRequest) -> None:
-        self.engine_core.add_request(request)
+        req, request_wave = self.engine_core.preprocess_add_request(request)
+        self.engine_core.add_request(req, request_wave)
 
     def abort_requests(self, request_ids: list[str]) -> None:
         if len(request_ids) > 0:

From 9cb497bfa346721aaf5e09a7f483764a1a54f8b4 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Wed, 30 Jul 2025 20:39:46 -0400
Subject: [PATCH 080/224] [Example] Add `async_llm_streaming.py` example for
 AsyncLLM streaming in python (#21763)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 .../offline_inference/async_llm_streaming.py  | 111 ++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 examples/offline_inference/async_llm_streaming.py

diff --git a/examples/offline_inference/async_llm_streaming.py b/examples/offline_inference/async_llm_streaming.py
new file mode 100644
index 0000000000000..b876d536e3a19
--- /dev/null
+++ b/examples/offline_inference/async_llm_streaming.py
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Simple example demonstrating streaming offline inference with AsyncLLM (V1 engine).
+
+This script shows the core functionality of vLLM's AsyncLLM engine for streaming
+token-by-token output in offline inference scenarios. It demonstrates DELTA mode
+streaming where you receive new tokens as they are generated.
+
+Usage:
+    python examples/offline_inference/async_llm_streaming.py
+"""
+
+import asyncio
+
+from vllm import SamplingParams
+from vllm.engine.arg_utils import AsyncEngineArgs
+from vllm.sampling_params import RequestOutputKind
+from vllm.v1.engine.async_llm import AsyncLLM
+
+
+async def stream_response(engine: AsyncLLM, prompt: str, request_id: str) -> None:
+    """
+    Stream response from AsyncLLM and display tokens as they arrive.
+
+    This function demonstrates the core streaming pattern:
+    1. Create SamplingParams with DELTA output kind
+    2. Call engine.generate() and iterate over the async generator
+    3. Print new tokens as they arrive
+    4. Handle the finished flag to know when generation is complete
+    """
+    print(f"\n🚀 Prompt: {prompt!r}")
+    print("💬 Response: ", end="", flush=True)
+
+    # Configure sampling parameters for streaming
+    sampling_params = SamplingParams(
+        max_tokens=100,
+        temperature=0.8,
+        top_p=0.95,
+        seed=42,  # For reproducible results
+        output_kind=RequestOutputKind.DELTA,  # Get only new tokens each iteration
+    )
+
+    try:
+        # Stream tokens from AsyncLLM
+        async for output in engine.generate(
+            request_id=request_id, prompt=prompt, sampling_params=sampling_params
+        ):
+            # Process each completion in the output
+            for completion in output.outputs:
+                # In DELTA mode, we get only new tokens generated since last iteration
+                new_text = completion.text
+                if new_text:
+                    print(new_text, end="", flush=True)
+
+            # Check if generation is finished
+            if output.finished:
+                print("\n✅ Generation complete!")
+                break
+
+    except Exception as e:
+        print(f"\n❌ Error during streaming: {e}")
+        raise
+
+
+async def main():
+    print("🔧 Initializing AsyncLLM...")
+
+    # Create AsyncLLM engine with simple configuration
+    engine_args = AsyncEngineArgs(
+        model="meta-llama/Llama-3.2-1B-Instruct",
+        enforce_eager=True,  # Faster startup for examples
+    )
+    engine = AsyncLLM.from_engine_args(engine_args)
+
+    try:
+        # Example prompts to demonstrate streaming
+        prompts = [
+            "The future of artificial intelligence is",
+            "In a galaxy far, far away",
+            "The key to happiness is",
+        ]
+
+        print(f"🎯 Running {len(prompts)} streaming examples...")
+
+        # Process each prompt
+        for i, prompt in enumerate(prompts, 1):
+            print(f"\n{'=' * 60}")
+            print(f"Example {i}/{len(prompts)}")
+            print(f"{'=' * 60}")
+
+            request_id = f"stream-example-{i}"
+            await stream_response(engine, prompt, request_id)
+
+            # Brief pause between examples
+            if i < len(prompts):
+                await asyncio.sleep(0.5)
+
+        print("\n🎉 All streaming examples completed!")
+
+    finally:
+        # Always clean up the engine
+        print("🔧 Shutting down engine...")
+        engine.shutdown()
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\n🛑 Interrupted by user")

From ec02e536dfa46c7e8785cb5aaf5dd4eaad88f405 Mon Sep 17 00:00:00 2001
From: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
Date: Thu, 31 Jul 2025 04:38:52 +0100
Subject: [PATCH 081/224] [Bugfix] Relax lang pin for voxtral (#21833)

Signed-off-by: Sanchit Gandhi <sgandhi3141@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 vllm/entrypoints/openai/speech_to_text.py |  8 +--
 vllm/model_executor/models/interfaces.py  | 53 ++++++++++++++--
 vllm/model_executor/models/voxtral.py     | 25 +++++---
 vllm/model_executor/models/whisper.py     | 74 +++++------------------
 4 files changed, 80 insertions(+), 80 deletions(-)

diff --git a/vllm/entrypoints/openai/speech_to_text.py b/vllm/entrypoints/openai/speech_to_text.py
index c2227a21a4b9a..01140a4bfea7e 100644
--- a/vllm/entrypoints/openai/speech_to_text.py
+++ b/vllm/entrypoints/openai/speech_to_text.py
@@ -86,11 +86,7 @@ class OpenAISpeechToText(OpenAIServing):
         audio_data: bytes,
     ) -> tuple[list[PromptType], float]:
         # Validate request
-        # TODO language should be optional and can be guessed.
-        # For now we default to en. See
-        # https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/generation_whisper.py#L1520
-        lang = request.language or "en"
-        self.model_cls.validate_language(lang)
+        language = self.model_cls.validate_language(request.language)
 
         if len(audio_data) / 1024**2 > self.max_audio_filesize_mb:
             raise ValueError("Maximum file size exceeded.")
@@ -112,7 +108,7 @@ class OpenAISpeechToText(OpenAIServing):
                 audio=chunk,
                 stt_config=self.asr_config,
                 model_config=self.model_config,
-                language=lang,
+                language=language,
                 task_type=self.task_type,
                 request_prompt=request.prompt)
             prompts.append(prompt)
diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py
index 957b57276b4ca..b6d9877cd01b6 100644
--- a/vllm/model_executor/models/interfaces.py
+++ b/vllm/model_executor/models/interfaces.py
@@ -1,13 +1,14 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from collections.abc import Iterable, MutableSequence
+from collections.abc import Iterable, Mapping, MutableSequence
 from typing import (TYPE_CHECKING, ClassVar, Literal, Optional, Protocol,
                     Union, overload, runtime_checkable)
 
 import numpy as np
 import torch
 from torch import Tensor
+from transformers.models.whisper.tokenization_whisper import LANGUAGES
 from typing_extensions import Self, TypeIs
 
 from vllm.config import ModelConfig, SpeechToTextConfig
@@ -685,6 +686,8 @@ class SupportsQuant:
 @runtime_checkable
 class SupportsTranscription(Protocol):
     """The interface required for all models that support transcription."""
+    # Mapping from ISO639_1 language codes: language names
+    supported_languages: ClassVar[Mapping[str, str]]
 
     supports_transcription: ClassVar[Literal[True]] = True
 
@@ -694,11 +697,22 @@ class SupportsTranscription(Protocol):
     `True`.
     """
 
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        # language codes in supported_languages
+        # that don't exist in the full language map
+        invalid = set(cls.supported_languages) - set(LANGUAGES.keys())
+        if invalid:
+            raise ValueError(
+                f"{cls.__name__}.supported_languages contains invalid "
+                f"language codes: {sorted(invalid)}\n. "
+                f"Valid choices are: {sorted(LANGUAGES.keys())}")
+
     @classmethod
     def get_generation_prompt(cls, audio: np.ndarray,
                               stt_config: SpeechToTextConfig,
-                              model_config: ModelConfig, language: str,
-                              task_type: str,
+                              model_config: ModelConfig,
+                              language: Optional[str], task_type: str,
                               request_prompt: str) -> PromptType:
         """Get the prompt for the ASR model.
         The model has control over the construction, as long as it
@@ -706,9 +720,36 @@ class SupportsTranscription(Protocol):
         ...
 
     @classmethod
-    def validate_language(cls, language: str) -> bool:
-        """Check if the model supports a specific ISO639_1 language."""
-        ...
+    def get_other_languages(cls) -> Mapping[str, str]:
+        # other possible language codes from the whisper map
+        return {
+            k: v
+            for k, v in LANGUAGES.items() if k not in cls.supported_languages
+        }
+
+    @classmethod
+    def validate_language(cls, language: Optional[str]) -> Optional[str]:
+        """
+        Ensure the language specified in the transcription request 
+        is a valid ISO 639-1 language code. If the request language is 
+        valid, but not natively supported by the model, trigger a 
+        warning (but not an exception).
+        """
+        if language is None or language in cls.supported_languages:
+            return language
+        elif language in cls.get_other_languages():
+            logger.warning(
+                "Language %r is not natively supported by %s; "
+                "results may be less accurate. Supported languages: %r",
+                language,
+                cls.__name__,
+                list(cls.supported_languages.keys()),
+            )
+            return language
+        else:
+            raise ValueError(
+                f"Unsupported language: {language!r}.  Must be one of "
+                f"{list(cls.supported_languages.keys())}.")
 
     @classmethod
     def get_speech_to_text_config(
diff --git a/vllm/model_executor/models/voxtral.py b/vllm/model_executor/models/voxtral.py
index 97cab628317e4..6b06c0ac6683f 100644
--- a/vllm/model_executor/models/voxtral.py
+++ b/vllm/model_executor/models/voxtral.py
@@ -26,8 +26,7 @@ from vllm.logger import init_logger
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.models import SupportsPP
 # yapf: disable
-from vllm.model_executor.models.whisper import (
-    WhisperEncoder, WhisperForConditionalGeneration)
+from vllm.model_executor.models.whisper import WhisperEncoder
 # yapf: enable
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.multimodal import MULTIMODAL_REGISTRY
@@ -50,6 +49,18 @@ from .utils import (flatten_bn, init_vllm_registered_model, maybe_prefix,
 
 logger = init_logger(__name__)
 
+ISO639_1_SUPPORTED_LANGS = {
+    "ar": "Arabic",
+    "nl": "Dutch",
+    "en": "English",
+    "fr": "French",
+    "de": "German",
+    "hi": "Hindi",
+    "it": "Italian",
+    "pt": "Portuguese",
+    "es": "Spanish",
+}
+
 
 class VoxtralProcessorAdapter:
     """
@@ -301,6 +312,7 @@ class VoxtralMultiModalProcessor(BaseMultiModalProcessor[VoxtralProcessingInfo]
                                         dummy_inputs=VoxtralDummyInputsBuilder)
 class VoxtralForConditionalGeneration(nn.Module, SupportsMultiModal,
                                       SupportsPP, SupportsTranscription):
+    supported_languages = ISO639_1_SUPPORTED_LANGS
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
@@ -441,8 +453,8 @@ class VoxtralForConditionalGeneration(nn.Module, SupportsMultiModal,
     # for speech-to-text transcription
     def get_generation_prompt(cls, audio: np.ndarray,
                               model_config: ModelConfig,
-                              stt_config: SpeechToTextConfig, language: str,
-                              task_type: str,
+                              stt_config: SpeechToTextConfig,
+                              language: Optional[str], task_type: str,
                               request_prompt: str) -> PromptType:
         tokenizer = cached_tokenizer_from_config(model_config)
         audio = Audio(audio, int(stt_config.sample_rate),
@@ -457,11 +469,6 @@ class VoxtralForConditionalGeneration(nn.Module, SupportsMultiModal,
         prompts_dict["prompt_token_ids"] = tokenized.tokens
         return cast(PromptType, prompts_dict)
 
-    @classmethod
-    def validate_language(cls, language: str) -> bool:
-        # same as whisper
-        return WhisperForConditionalGeneration.validate_language(language)
-
     @classmethod
     def get_num_audio_tokens(cls, audio_duration_s: float,
                              stt_config: SpeechToTextConfig,
diff --git a/vllm/model_executor/models/whisper.py b/vllm/model_executor/models/whisper.py
index d98dab5fac0e4..d7bafb9ef84d9 100644
--- a/vllm/model_executor/models/whisper.py
+++ b/vllm/model_executor/models/whisper.py
@@ -109,51 +109,6 @@ ISO639_1_SUPPORTED_LANGS = {
     "vi": "Vietnamese",
     "cy": "Welsh"
 }
-ISO639_1_OTHER_LANGS = {
-    "lo": "Lao",
-    "jw": "Javanese",
-    "tk": "Turkmen",
-    "yi": "Yiddish",
-    "so": "Somali",
-    "bn": "Bengali",
-    "nn": "Norwegian Nynorsk",
-    "si": "Sinhala",
-    "yo": "Yoruba",
-    "sa": "Sanskrit",
-    "mi": "Māori",
-    "fo": "Faroese",  # codespell:ignore
-    "mt": "Maltese",
-    "tg": "Tajik",
-    "mg": "Malagasy",
-    "haw": "Hawaiian",
-    "km": "Khmer",
-    "br": "Breton",
-    "ps": "Pashto",
-    "ln": "Lingala",
-    "la": "Latin",
-    "ml": "Malayalam",
-    "sq": "Albanian",
-    "su": "Sundanese",
-    "eu": "Basque",
-    "ka": "Georgian",
-    "uz": "Uzbek",
-    "sn": "Shona",
-    "ht": "Haitian",
-    "as": "Assamese",
-    "mn": "Mongolian",
-    "te": "Telugu",
-    "pa": "Panjabi",
-    "tt": "Tatar",
-    "gu": "Gujarati",
-    "oc": "Occitan",
-    "ha": "Hausa",
-    "ba": "Bashkir",
-    "my": "Burmese",
-    "sd": "Sindhi",
-    "am": "Amharic",
-    "lb": "Luxembourgish",
-    "bo": "Tibetan"
-}
 
 
 class WhisperAudioInputs(TypedDict):
@@ -807,22 +762,20 @@ class WhisperForConditionalGeneration(nn.Module, SupportsTranscription,
 
     # Whisper only supports audio-conditioned generation.
     supports_transcription_only = True
+    supported_languages = ISO639_1_SUPPORTED_LANGS
 
     @classmethod
-    def validate_language(cls, language: str) -> bool:
-        if language in ISO639_1_SUPPORTED_LANGS:
-            return True
-        elif language in ISO639_1_OTHER_LANGS:
+    def validate_language(cls, language: Optional[str]) -> Optional[str]:
+        if language is None:
+            # TODO language should be optional and can be guessed.
+            # For now we default to en. See
+            # https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/generation_whisper.py#L1520
             logger.warning(
-                "The selected language %s has limited accuracy with"
-                " reported WER>=0.5. Results may be less accurate "
-                "for this choice.", language)
-            return True
-        else:
-            raise ValueError(f"Unsupported language: {language}."
-                             "Language should be one of:" +
-                             f" {list(ISO639_1_SUPPORTED_LANGS.values())}" +
-                             f"or {list(ISO639_1_OTHER_LANGS.values())}")
+                "Defaulting to language='en'. If you wish to transcribe "
+                "audio in a different language, pass the `language` field "
+                "in the TranscriptionRequest.")
+            language = "en"
+        return super().validate_language(language)
 
     @classmethod
     def get_generation_prompt(
@@ -830,9 +783,12 @@ class WhisperForConditionalGeneration(nn.Module, SupportsTranscription,
             audio: np.ndarray,
             model_config: ModelConfig,  # not needed here
             stt_config: SpeechToTextConfig,
-            language: str,
+            language: Optional[str],
             task_type: str,
             request_prompt: str) -> PromptType:
+        if language is None:
+            raise ValueError(
+                "Language must be specified when creating the Whisper prompt")
         prompt = {
             "encoder_prompt": {
                 # Whisper does not support encoder prompt.

From 61445453df8e514d9ddf4d6bd3f9063f120cdac5 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Wed, 30 Jul 2025 23:40:34 -0400
Subject: [PATCH 082/224] [UX] Rename CUTLASS_MLA_VLLM_V1 to CUTLASS_MLA
 (#21966)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 vllm/engine/arg_utils.py                      |  2 +-
 vllm/platforms/cuda.py                        | 10 +++++-----
 vllm/platforms/interface.py                   |  2 +-
 vllm/v1/attention/backends/mla/cutlass_mla.py |  2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index ababa49a53ae4..c36c79c69317e 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1417,7 +1417,7 @@ class EngineArgs:
             "PALLAS_VLLM_V1",
             "TRITON_ATTN_VLLM_V1",
             "TRITON_MLA",
-            "CUTLASS_MLA_VLLM_V1",
+            "CUTLASS_MLA",
             "FLASHMLA",
             "FLASHINFER",
             "FLASHINFER_VLLM_V1",
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index c35d22c1d6824..87ff6b385809a 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -162,7 +162,7 @@ class CudaPlatformBase(Platform):
                 if cls.is_device_capability(100):
                     # Blackwell => Force CutlassMLA.
                     use_cutlass_mla = True
-                    envs.VLLM_ATTENTION_BACKEND = "CUTLASS_MLA_VLLM_V1"
+                    envs.VLLM_ATTENTION_BACKEND = "CUTLASS_MLA"
                 else:
                     # Not Blackwell
                     use_flashmla = True
@@ -170,7 +170,7 @@ class CudaPlatformBase(Platform):
                 # Forced case
                 use_flashmla = (envs.VLLM_ATTENTION_BACKEND == "FLASHMLA")
                 use_cutlass_mla = (
-                    envs.VLLM_ATTENTION_BACKEND == "CUTLASS_MLA_VLLM_V1")
+                    envs.VLLM_ATTENTION_BACKEND == "CUTLASS_MLA")
 
             from vllm.attention.ops.flashmla import is_flashmla_supported
             if use_flashmla and is_flashmla_supported()[0] \
@@ -182,7 +182,7 @@ class CudaPlatformBase(Platform):
             if use_cutlass_mla and cache_config.block_size != 128:
                 cache_config.block_size = 128
                 logger.info("Forcing kv cache block size to 128 for "
-                            "CUTLASS_MLA_VLLM_V1 backend.")
+                            "CUTLASS_MLA backend.")
 
         compilation_config = vllm_config.compilation_config
         if (envs.VLLM_ALL2ALL_BACKEND == "deepep_high_throughput"
@@ -211,9 +211,9 @@ class CudaPlatformBase(Platform):
                              kv_cache_dtype, block_size, use_v1,
                              use_mla) -> str:
         if use_mla:
-            # TODO(lucas): refactor to  be more concise
+            # TODO(lucas): refactor to be more concise
             #  we should probably consider factoring out V1 here
-            if selected_backend == _Backend.CUTLASS_MLA_VLLM_V1:
+            if selected_backend == _Backend.CUTLASS_MLA:
                 if use_v1:
                     logger.info_once("Using Cutlass MLA backend on V1 engine.")
                     return ("vllm.v1.attention.backends.mla."
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 02cc392244bac..6bae0fe25c797 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -53,7 +53,7 @@ class _Backend(enum.Enum):
     TRITON_MLA_VLLM_V1 = enum.auto()
     FLASHMLA_VLLM_V1 = enum.auto()
     FLASHMLA = enum.auto()  # Supported by V1
-    CUTLASS_MLA_VLLM_V1 = enum.auto()
+    CUTLASS_MLA = enum.auto()
     PALLAS = enum.auto()
     PALLAS_VLLM_V1 = enum.auto()
     IPEX = enum.auto()
diff --git a/vllm/v1/attention/backends/mla/cutlass_mla.py b/vllm/v1/attention/backends/mla/cutlass_mla.py
index c787f25cd3adf..b23a8f0a5e870 100644
--- a/vllm/v1/attention/backends/mla/cutlass_mla.py
+++ b/vllm/v1/attention/backends/mla/cutlass_mla.py
@@ -21,7 +21,7 @@ class CutlassMLABackend(MLACommonBackend):
 
     @staticmethod
     def get_name() -> str:
-        return "CUTLASS_MLA_VLLM_V1"
+        return "CUTLASS_MLA"
 
     @staticmethod
     def get_impl_cls() -> type["CutlassMLAImpl"]:

From 0f7919fca05d7cf60b773da26d898b72bc07a089 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Thu, 31 Jul 2025 11:41:12 +0800
Subject: [PATCH 083/224] [Misc] Expand SUPPORTED_HIDDEN_SIZES  for DeepEP
 low-latency kernels (#21818)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 .../layers/fused_moe/deepep_ll_prepare_finalize.py              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py
index 57871ca250ae3..cfc2bdcf02408 100644
--- a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py
@@ -40,7 +40,7 @@ class DeepEPLLPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
 
     # DeepEP low-latency kernels are compiled only for certain
     # specific hidden sizes.
-    SUPPORTED_HIDDEN_SIZES = [2048, 2560, 4096, 5120, 7168]
+    SUPPORTED_HIDDEN_SIZES = [2048, 2560, 4096, 5120, 6144, 7168]
 
     def __init__(self,
                  buffer: deep_ep.Buffer,

From 055bd3978ededea015fb8f0cb6aa3cc48d84cde8 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Wed, 30 Jul 2025 23:45:29 -0400
Subject: [PATCH 084/224] [CI Bugfix] Fix CI OOM for
 `test_shared_storage_connector_hashes` (#21973)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 tests/v1/kv_connector/unit/test_shared_storage_connector.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/v1/kv_connector/unit/test_shared_storage_connector.py b/tests/v1/kv_connector/unit/test_shared_storage_connector.py
index ee3e71d3b8452..11b7e378441a4 100644
--- a/tests/v1/kv_connector/unit/test_shared_storage_connector.py
+++ b/tests/v1/kv_connector/unit/test_shared_storage_connector.py
@@ -10,7 +10,7 @@ from vllm.assets.image import ImageAsset
 from vllm.config import KVTransferConfig
 from vllm.multimodal.utils import encode_image_base64
 
-MODEL_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
+MODEL_NAME = "RedHatAI/Qwen2.5-VL-3B-Instruct-quantized.w4a16"
 
 SAMPLING_PARAMS = SamplingParams(temperature=0.0, top_k=1, max_tokens=128)
 
@@ -130,6 +130,8 @@ def test_shared_storage_connector_hashes(tmp_path):
         model=MODEL_NAME,
         max_model_len=8192,
         max_num_seqs=1,
+        gpu_memory_utilization=0.4,
+        enforce_eager=True,
         kv_transfer_config=kv_transfer_config,
         limit_mm_per_prompt={"image": 2},
     )

From 3e36fcbee642f41278a4881c9e2bfbbd7c28e607 Mon Sep 17 00:00:00 2001
From: Ning Xie <andy.xning@gmail.com>
Date: Thu, 31 Jul 2025 14:22:11 +0800
Subject: [PATCH 085/224] [Bugfix]: fix metadata file copy in
 test_sharded_state_loader (#21830)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
---
 tests/test_sharded_state_loader.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_sharded_state_loader.py b/tests/test_sharded_state_loader.py
index 64706defb5960..1bb4203d21c3e 100644
--- a/tests/test_sharded_state_loader.py
+++ b/tests/test_sharded_state_loader.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import fnmatch
 import multiprocessing as mp
 import os
 import shutil
@@ -64,9 +65,10 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
     # Copy metadata files to output directory
     for file in os.listdir(input_dir):
         if os.path.isdir(os.path.join(input_dir, file)):
-            continue
-        if not any(file.endswith(ext) for ext in weights_patterns):
-            shutil.copy(f"{input_dir}/{file}", output_dir)
+            shutil.copytree(os.path.join(input_dir, file),
+                            os.path.join(output_dir, file))
+        elif not any(fnmatch.fnmatch(file, ext) for ext in weights_patterns):
+            shutil.copy(os.path.join(input_dir, file), output_dir)
 
 
 def _run_generate(input_dir, queue: mp.Queue, **kwargs):

From 9532a6d5631bbf906f992806379516ed569c447d Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Thu, 31 Jul 2025 14:46:38 +0800
Subject: [PATCH 086/224] [Deprecation] Remove deprecated args and methods
 (#21907)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/entrypoints/chat_utils.py     | 32 ++++--------------------------
 vllm/multimodal/registry.py        | 25 -----------------------
 vllm/worker/neuron_model_runner.py |  7 +------
 3 files changed, 5 insertions(+), 59 deletions(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index a6602391d4081..6485ed6b148b4 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -48,7 +48,7 @@ from vllm.transformers_utils.chat_templates import (
 # yapf: enable
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
-from vllm.utils import deprecate_kwargs, random_uuid
+from vllm.utils import random_uuid
 
 logger = init_logger(__name__)
 
@@ -383,17 +383,12 @@ def resolve_mistral_chat_template(
     return None
 
 
-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def resolve_hf_chat_template(
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
     *,
     model_config: ModelConfig,
-    trust_remote_code: Optional[bool] = None,
 ) -> Optional[str]:
     # 1st priority: The given chat template
     if chat_template is not None:
@@ -488,10 +483,6 @@ def _log_chat_template_content_format(
         )
 
 
-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def resolve_chat_template_content_format(
     chat_template: Optional[str],
     tools: Optional[list[dict[str, Any]]],
@@ -499,7 +490,6 @@ def resolve_chat_template_content_format(
     tokenizer: AnyTokenizer,
     *,
     model_config: ModelConfig,
-    trust_remote_code: Optional[bool] = None,
 ) -> _ChatTemplateContentFormat:
     if given_format != "auto":
         return given_format
@@ -568,17 +558,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
 
         input_modality = modality.replace("_embeds", "")
 
-        if mm_registry.has_processor(model_config):
-            mm_processor = mm_registry.create_processor(model_config)
-            allowed_counts = mm_processor.info.get_allowed_mm_limits()
-            allowed_count = allowed_counts.get(input_modality, 0)
-        else:
-            mm_config = model_config.multimodal_config
-            if mm_config is None:
-                msg = "This model does not support multi-modal inputs"
-                raise ValueError(msg)
-
-            allowed_count = mm_config.get_limit_per_prompt(input_modality)
+        mm_processor = mm_registry.create_processor(model_config)
+        allowed_counts = mm_processor.info.get_allowed_mm_limits()
+        allowed_count = allowed_counts.get(input_modality, 0)
 
         current_count = len(self._items_by_modality[modality]) + 1
         if current_count > allowed_count:
@@ -1285,10 +1267,6 @@ def parse_chat_messages_futures(
     return conversation, mm_tracker.all_mm_data()
 
 
-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def apply_hf_chat_template(
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     conversation: list[ConversationMessage],
@@ -1297,8 +1275,6 @@ def apply_hf_chat_template(
     *,
     model_config: ModelConfig,
     tokenize: bool = False,  # Different from HF's default
-    # Deprecated, explicitly capture here so it doesn't slit into kwargs.
-    trust_remote_code: Optional[bool] = None,
     **kwargs: Any,
 ) -> str:
     hf_chat_template = resolve_hf_chat_template(
diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
index bfa391829d290..5f5b620e0cf79 100644
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -5,7 +5,6 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Generic, Optional, Protocol, TypeVar
 
 import torch.nn as nn
-from typing_extensions import deprecated
 
 from vllm.envs import VLLM_MM_INPUT_CACHE_GIB
 from vllm.inputs import InputProcessingContext
@@ -105,13 +104,6 @@ class MultiModalRegistry:
 
         return True  # Success
 
-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def create_input_mapper(self, model_config: "ModelConfig"):
-        return lambda data, mm_processor_kwargs: data
-
     def get_max_tokens_per_item_by_modality(
         self,
         model_config: "ModelConfig",
@@ -182,16 +174,6 @@ class MultiModalRegistry:
         """
         return sum(self.get_max_tokens_by_modality(model_config).values())
 
-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def init_mm_limits_per_prompt(
-        self,
-        model_config: "ModelConfig",
-    ) -> None:
-        pass
-
     def get_mm_limits_per_prompt(
         self,
         model_config: "ModelConfig",
@@ -246,13 +228,6 @@ class MultiModalRegistry:
         model_cls, _ = get_model_architecture(model_config)
         return model_cls
 
-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def has_processor(self, model_config: "ModelConfig") -> bool:
-        return True
-
     def create_processor(
         self,
         model_config: "ModelConfig",
diff --git a/vllm/worker/neuron_model_runner.py b/vllm/worker/neuron_model_runner.py
index 7ccf1a2c0a876..8317b9abff0cd 100644
--- a/vllm/worker/neuron_model_runner.py
+++ b/vllm/worker/neuron_model_runner.py
@@ -15,8 +15,7 @@ from vllm.lora.request import LoRARequest
 from vllm.model_executor import SamplingMetadata
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.model_executor.model_loader.neuron import get_neuron_model
-from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
-                             MultiModalKwargs)
+from vllm.multimodal import BatchedTensorInputs, MultiModalKwargs
 from vllm.platforms import current_platform
 from vllm.sampling_params import SamplingParams
 from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
@@ -88,10 +87,6 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
         self.device = self.device_config.device
         self.pin_memory = is_pin_memory_available()
 
-        # Multi-modal data support
-        self.multi_modal_input_mapper = MULTIMODAL_REGISTRY \
-            .create_input_mapper(self.model_config)
-
         # Lazy initialization.
         self.model: nn.Module  # initialize after load_model.
 

From d2aab336ad7822efe7cfc345fa3ad67d6f5cbe39 Mon Sep 17 00:00:00 2001
From: Daniele <36171005+dtrifiro@users.noreply.github.com>
Date: Thu, 31 Jul 2025 09:00:08 +0200
Subject: [PATCH 087/224] [CI/Build] get rid of unused VLLM_FA_CMAKE_GPU_ARCHES
 (#21599)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com>
---
 .buildkite/scripts/hardware_ci/run-gh200-test.sh | 3 +--
 .github/workflows/scripts/build.sh               | 1 -
 docker/Dockerfile                                | 3 ---
 docker/Dockerfile.nightly_torch                  | 3 ---
 docs/deployment/docker.md                        | 3 +--
 5 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/.buildkite/scripts/hardware_ci/run-gh200-test.sh b/.buildkite/scripts/hardware_ci/run-gh200-test.sh
index 8c64e14606d3b..f69e4b06680f5 100644
--- a/.buildkite/scripts/hardware_ci/run-gh200-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-gh200-test.sh
@@ -16,8 +16,7 @@ DOCKER_BUILDKIT=1 docker build . \
   --build-arg max_jobs=66 \
   --build-arg nvcc_threads=2 \
   --build-arg RUN_WHEEL_CHECK=false \
-  --build-arg torch_cuda_arch_list="9.0+PTX" \
-  --build-arg vllm_fa_cmake_gpu_arches="90-real"
+  --build-arg torch_cuda_arch_list="9.0+PTX"
 
 # Setup cleanup
 remove_docker_container() { docker rm -f gh200-test || true; }
diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh
index 0f010832b465d..c69ebbb42da5a 100644
--- a/.github/workflows/scripts/build.sh
+++ b/.github/workflows/scripts/build.sh
@@ -15,7 +15,6 @@ $python_executable -m pip install -r requirements/build.txt -r requirements/cuda
 export MAX_JOBS=1
 # Make sure release wheels are built for the following architectures
 export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
-export VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
 
 bash tools/check_repo.sh
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 75b5ab0230c87..43522ef8fb8dd 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -164,9 +164,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # see https://github.com/pytorch/pytorch/pull/123243
 ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
-# Override the arch list for flash-attn to reduce the binary size
-ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
-ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
 #################### BASE BUILD IMAGE ####################
 
 #################### WHEEL BUILD IMAGE ####################
diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch
index 8d43de77aad59..e147b97f0e056 100644
--- a/docker/Dockerfile.nightly_torch
+++ b/docker/Dockerfile.nightly_torch
@@ -114,9 +114,6 @@ RUN cat torch_build_versions.txt
 # explicitly set the list to avoid issues with torch 2.2
 # see https://github.com/pytorch/pytorch/pull/123243
 
-# Override the arch list for flash-attn to reduce the binary size
-ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
-ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
 #################### BASE BUILD IMAGE ####################
 
 #################### WHEEL BUILD IMAGE ####################
diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md
index 5f6cfcb00a37a..1f19f2fecfab1 100644
--- a/docs/deployment/docker.md
+++ b/docs/deployment/docker.md
@@ -106,8 +106,7 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `--
     -t vllm/vllm-gh200-openai:latest \
     --build-arg max_jobs=66 \
     --build-arg nvcc_threads=2 \
-    --build-arg torch_cuda_arch_list="9.0 10.0+PTX" \
-    --build-arg vllm_fa_cmake_gpu_arches="90-real"
+    --build-arg torch_cuda_arch_list="9.0 10.0+PTX"
     ```
 
 !!! note

From 2836dd73f13015ee386c544760ca0d16888203f3 Mon Sep 17 00:00:00 2001
From: "wang.yuqi" <noooop@126.com>
Date: Thu, 31 Jul 2025 16:51:15 +0800
Subject: [PATCH 088/224] [Model][CI] Let more pooling models support v1
 (#21747)

Signed-off-by: wang.yuqi <noooop@126.com>
---
 .../language/pooling/test_classification.py    |  8 --------
 tests/models/language/pooling/test_gte.py      | 18 ++++--------------
 tests/models/language/pooling/test_jina.py     | 13 -------------
 .../language/pooling/test_qwen3_reranker.py    |  6 ------
 vllm/config.py                                 |  8 ++++++++
 vllm/model_executor/models/bert_with_rope.py   |  5 +----
 vllm/model_executor/models/config.py           |  2 +-
 vllm/model_executor/models/modernbert.py       |  2 --
 8 files changed, 14 insertions(+), 48 deletions(-)

diff --git a/tests/models/language/pooling/test_classification.py b/tests/models/language/pooling/test_classification.py
index 77df6d16a3673..c71fa96275335 100644
--- a/tests/models/language/pooling/test_classification.py
+++ b/tests/models/language/pooling/test_classification.py
@@ -6,14 +6,6 @@ from transformers import AutoModelForSequenceClassification
 
 from vllm.platforms import current_platform
 
-# TODO: enable when float32 is supported by V1
-# @pytest.fixture(autouse=True)
-# def v1(run_with_both_engines):
-#     # Simple autouse wrapper to run both engines for each test
-#     # This can be promoted up to conftest.py to run for every
-#     # test in a package
-#     pass
-
 
 @pytest.mark.parametrize(
     "model",
diff --git a/tests/models/language/pooling/test_gte.py b/tests/models/language/pooling/test_gte.py
index 0ad54785308e8..6d2eff709961b 100644
--- a/tests/models/language/pooling/test_gte.py
+++ b/tests/models/language/pooling/test_gte.py
@@ -56,17 +56,10 @@ MODELS = [
                    enable_test=False),
 ]
 
-V1FlashAttentionImpNotSupported = [
-    "Alibaba-NLP/gte-Qwen2-1.5B-instruct", "Alibaba-NLP/gte-modernbert-base"
-]
-
 
 @pytest.mark.parametrize("model_info", MODELS)
-def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo,
-                           monkeypatch) -> None:
-    if model_info.name in V1FlashAttentionImpNotSupported:
-        monkeypatch.setenv("VLLM_USE_V1", "0")
-
+def test_embed_models_mteb(hf_runner, vllm_runner,
+                           model_info: EmbedModelInfo) -> None:
     vllm_extra_kwargs: dict[str, Any] = {}
     if model_info.architecture == "GteNewModel":
         vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
@@ -77,11 +70,8 @@ def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo,
 
 @pytest.mark.parametrize("model_info", MODELS)
 def test_embed_models_correctness(hf_runner, vllm_runner,
-                                  model_info: EmbedModelInfo, example_prompts,
-                                  monkeypatch) -> None:
-    if model_info.name in V1FlashAttentionImpNotSupported:
-        monkeypatch.setenv("VLLM_USE_V1", "0")
-
+                                  model_info: EmbedModelInfo,
+                                  example_prompts) -> None:
     vllm_extra_kwargs: dict[str, Any] = {}
     if model_info.architecture == "GteNewModel":
         vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
diff --git a/tests/models/language/pooling/test_jina.py b/tests/models/language/pooling/test_jina.py
index 2ae431de16838..59b634428ceff 100644
--- a/tests/models/language/pooling/test_jina.py
+++ b/tests/models/language/pooling/test_jina.py
@@ -4,7 +4,6 @@ from functools import partial
 
 import pytest
 
-import vllm.envs as envs
 from vllm import PoolingParams
 
 from ...utils import EmbedModelInfo, RerankModelInfo
@@ -24,14 +23,6 @@ RERANK_MODELS = [
 ]
 
 
-@pytest.fixture(autouse=True)
-def v1(run_with_both_engines):
-    # Simple autouse wrapper to run both engines for each test
-    # This can be promoted up to conftest.py to run for every
-    # test in a package
-    pass
-
-
 @pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
 def test_embed_models_mteb(hf_runner, vllm_runner,
                            model_info: EmbedModelInfo) -> None:
@@ -63,10 +54,6 @@ def test_embed_models_correctness(hf_runner, vllm_runner,
 @pytest.mark.parametrize("model_info", RERANK_MODELS)
 def test_rerank_models_mteb(hf_runner, vllm_runner,
                             model_info: RerankModelInfo) -> None:
-    if (model_info.architecture == "XLMRobertaForSequenceClassification"
-            and envs.VLLM_USE_V1):
-        pytest.skip("Not supported yet")
-
     mteb_test_rerank_models(hf_runner, vllm_runner, model_info)
 
 
diff --git a/tests/models/language/pooling/test_qwen3_reranker.py b/tests/models/language/pooling/test_qwen3_reranker.py
index 9c6a833b41384..68e96f32700ca 100644
--- a/tests/models/language/pooling/test_qwen3_reranker.py
+++ b/tests/models/language/pooling/test_qwen3_reranker.py
@@ -83,9 +83,6 @@ def test_rerank_models_mteb(vllm_runner, model_info: RerankModelInfo) -> None:
         }
     }
 
-    if model_info.name == "Qwen/Qwen3-Reranker-4B":
-        vllm_extra_kwargs["max_num_seqs"] = 1
-
     mteb_test_rerank_models(Qwen3RerankerHfRunner, vllm_runner, model_info,
                             vllm_extra_kwargs)
 
@@ -106,9 +103,6 @@ def test_rerank_models_mteb_tp(vllm_runner,
         "tensor_parallel_size": 2,
     }
 
-    if model_info.name == "Qwen/Qwen3-Reranker-4B":
-        vllm_extra_kwargs["max_num_seqs"] = 1
-
     mteb_test_rerank_models(Qwen3RerankerHfRunner,
                             vllm_runner,
                             model_info,
diff --git a/vllm/config.py b/vllm/config.py
index a330bafb76332..27dde5f1b1f6f 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -776,6 +776,9 @@ class ModelConfig:
             raise ValueError(
                 "`override_neuron_config` is only supported on Neuron.")
 
+        # Avoid running try_verify_and_update_config multiple times
+        self.config_updated = False
+
         self._verify_quantization()
         self._verify_cuda_graph()
         self._verify_bnb_config()
@@ -4914,6 +4917,11 @@ class VllmConfig:
         if self.model_config is None:
             return
 
+        # Avoid running try_verify_and_update_config multiple times
+        if getattr(self.model_config, "config_updated", False):
+            return
+        self.model_config.config_updated = True
+
         architecture = self.model_config.architecture
         if architecture is None:
             return
diff --git a/vllm/model_executor/models/bert_with_rope.py b/vllm/model_executor/models/bert_with_rope.py
index 5249acbd84a56..59033cb74a338 100644
--- a/vllm/model_executor/models/bert_with_rope.py
+++ b/vllm/model_executor/models/bert_with_rope.py
@@ -8,7 +8,6 @@ from torch import nn
 from transformers import PretrainedConfig
 
 from vllm.attention import Attention, AttentionType
-from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CacheConfig, VllmConfig
 from vllm.distributed import (divide, get_tensor_model_parallel_rank,
                               get_tensor_model_parallel_world_size,
@@ -26,7 +25,6 @@ from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding)
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
-from vllm.model_executor.models import SupportsV0Only
 from vllm.model_executor.models.interfaces import SupportsQuant
 from vllm.model_executor.models.utils import WeightsMapper
 from vllm.model_executor.utils import set_weight_attrs
@@ -360,7 +358,6 @@ class BertWithRopeBlock(nn.Module):
         return hidden_states
 
 
-@support_torch_compile
 class BertWithRopeEncoder(nn.Module):
 
     def __init__(self,
@@ -394,7 +391,7 @@ class BertWithRopeEncoder(nn.Module):
         return hidden_states
 
 
-class BertWithRope(nn.Module, SupportsV0Only, SupportsQuant):
+class BertWithRope(nn.Module, SupportsQuant):
     hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={"model.": ""})
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py
index 6f50b17530987..9030ff307bee3 100644
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -93,7 +93,7 @@ class NomicBertModelConfig(VerifyAndUpdateConfig):
         config.num_hidden_layers = config.n_layer
 
         head_dim = config.hidden_size // config.num_attention_heads
-        rotary_emb_dim = head_dim * config.rotary_emb_fraction
+        rotary_emb_dim = int(head_dim * config.rotary_emb_fraction)
         max_trained_positions = getattr(config, "max_trained_positions", 2048)
         config.rotary_kwargs = {
             "head_size": head_dim,
diff --git a/vllm/model_executor/models/modernbert.py b/vllm/model_executor/models/modernbert.py
index fc2b0c1f51821..4967032a244ec 100644
--- a/vllm/model_executor/models/modernbert.py
+++ b/vllm/model_executor/models/modernbert.py
@@ -8,7 +8,6 @@ from torch import nn
 from transformers import ModernBertConfig
 
 from vllm.attention import Attention, AttentionType
-from vllm.compilation.decorators import support_torch_compile
 from vllm.config import VllmConfig
 from vllm.distributed import get_tensor_model_parallel_world_size
 from vllm.model_executor.layers.linear import (QKVParallelLinear,
@@ -200,7 +199,6 @@ class ModernBertEncoderLayer(nn.Module):
         return hidden_states
 
 
-@support_torch_compile
 class ModernBertModel(nn.Module):
     hf_to_vllm_mapper = WeightsMapper(
         orig_to_new_prefix={"layers.": "encoder_layer.layers."})

From 5daffe7cf6db9765bd667d1a2cf5f18843d58fc7 Mon Sep 17 00:00:00 2001
From: Nick Hill <nhill@redhat.com>
Date: Thu, 31 Jul 2025 13:51:37 +0100
Subject: [PATCH 089/224] [BugFix] Fix case where `collective_rpc` returns
 `None` (#22006)

Signed-off-by: Nick Hill <nhill@redhat.com>
---
 tests/v1/engine/test_engine_core_client.py | 13 +++++++++++--
 vllm/v1/serial_utils.py                    | 16 ++++++++++------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py
index f648c38a63f79..1329ce5f69cbd 100644
--- a/tests/v1/engine/test_engine_core_client.py
+++ b/tests/v1/engine/test_engine_core_client.py
@@ -305,10 +305,10 @@ def echo_dc(
     return_list: bool = False,
 ) -> Union[MyDataclass, list[MyDataclass]]:
     print(f"echo dc util function called: {msg}")
+    val = None if msg is None else MyDataclass(msg)
     # Return dataclass to verify support for returning custom types
     # (for which there is special handling to make it work with msgspec).
-    return [MyDataclass(msg) for _ in range(3)] if return_list \
-        else MyDataclass(msg)
+    return [val for _ in range(3)] if return_list else val
 
 
 @pytest.mark.asyncio(loop_scope="function")
@@ -351,6 +351,15 @@ async def test_engine_core_client_util_method_custom_return(
             assert isinstance(result, list) and all(
                 isinstance(r, MyDataclass) and r.message == "testarg2"
                 for r in result)
+
+            # Test returning None and list of Nones
+            result = await core_client.call_utility_async(
+                "echo_dc", None, False)
+            assert result is None
+            result = await core_client.call_utility_async(
+                "echo_dc", None, True)
+            assert isinstance(result, list) and all(r is None for r in result)
+
         finally:
             client.shutdown()
 
diff --git a/vllm/v1/serial_utils.py b/vllm/v1/serial_utils.py
index 4b6a983252b0e..809a60c1962f8 100644
--- a/vllm/v1/serial_utils.py
+++ b/vllm/v1/serial_utils.py
@@ -49,7 +49,10 @@ def _log_insecure_serialization_warning():
                         "VLLM_ALLOW_INSECURE_SERIALIZATION=1")
 
 
-def _typestr(t: type):
+def _typestr(val: Any) -> Optional[tuple[str, str]]:
+    if val is None:
+        return None
+    t = type(val)
     return t.__module__, t.__qualname__
 
 
@@ -131,14 +134,13 @@ class MsgpackEncoder:
 
         if isinstance(obj, UtilityResult):
             result = obj.result
-            if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION or result is None:
+            if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION:
                 return None, result
             # Since utility results are not strongly typed, we also encode
             # the type (or a list of types in the case it's a list) to
             # help with correct msgspec deserialization.
-            cls = result.__class__
-            return _typestr(cls) if cls is not list else [
-                _typestr(type(v)) for v in result
+            return _typestr(result) if type(result) is not list else [
+                _typestr(v) for v in result
             ], result
 
         if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION:
@@ -277,7 +279,9 @@ class MsgpackDecoder:
                 ]
         return UtilityResult(result)
 
-    def _convert_result(self, result_type: Sequence[str], result: Any):
+    def _convert_result(self, result_type: Sequence[str], result: Any) -> Any:
+        if result_type is None:
+            return result
         mod_name, name = result_type
         mod = importlib.import_module(mod_name)
         result_type = getattr(mod, name)

From 207b750e194829c4bcd4df0450f5f93d71755dae Mon Sep 17 00:00:00 2001
From: amirkl94 <203507526+amirkl94@users.noreply.github.com>
Date: Thu, 31 Jul 2025 16:00:01 +0300
Subject: [PATCH 090/224] [NVIDIA] Add SM100 Flashinfer MoE per tensor scale
 fp8 backend (#21458)

Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com>
Signed-off-by: mgoin <mgoin64@gmail.com>
Co-authored-by: mgoin <mgoin64@gmail.com>
---
 .../layers/fused_moe/fused_moe.py             | 113 +++++++++++++++---
 .../model_executor/layers/quantization/fp8.py |  75 +++++++-----
 .../layers/quantization/modelopt.py           |  28 +++++
 .../quantization/utils/flashinfer_utils.py    | 100 ++++++++++++++++
 vllm/utils/flashinfer.py                      |   2 +
 5 files changed, 269 insertions(+), 49 deletions(-)
 create mode 100644 vllm/model_executor/layers/quantization/utils/flashinfer_utils.py

diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 227aacf25c0b0..b69575c7e96de 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -30,6 +30,8 @@ from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
     TopKWeightAndReduceNoOP)
 from vllm.model_executor.layers.fused_moe.utils import (
     _resize_cache, moe_kernel_quantize_input, per_token_group_quant_fp8)
+from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
+    calculate_tile_tokens_dim)
 from vllm.model_executor.layers.quantization.utils.mxfp4_utils import (
     dequant_mxfp4)
 from vllm.platforms import current_platform
@@ -1065,22 +1067,6 @@ direct_register_custom_op(
 )
 
 
-def next_positive_power_of_2(x: int) -> int:
-    if x < 1:
-        return 1
-    return 1 << (x - 1).bit_length()
-
-
-def _get_tile_tokens_dim(num_tokens, top_k, num_experts):
-    # Guess tokens per expert assuming perfect expert distribution first.
-    num_tokens_per_expert = (num_tokens * top_k) // num_experts
-    # And pad the number to the next power of 2.
-    tile_tokens_dim = next_positive_power_of_2(num_tokens_per_expert)
-    # Cap to 8-64 tokens per CTA tile as it's the range supported by the kernel.
-    tile_tokens_dim = min(max(tile_tokens_dim, 8), 64)
-    return tile_tokens_dim
-
-
 def flashinfer_fused_moe_blockscale_fp8(
         routing_logits: torch.Tensor,
         routing_bias: torch.Tensor,
@@ -1128,8 +1114,8 @@ def flashinfer_fused_moe_blockscale_fp8(
         local_expert_offset=expert_offset,
         local_num_experts=local_num_experts,
         routed_scaling_factor=routed_scaling,
-        tile_tokens_dim=_get_tile_tokens_dim(x.shape[0], top_k,
-                                             global_num_experts),
+        tile_tokens_dim=calculate_tile_tokens_dim(x.shape[0], top_k,
+                                                  global_num_experts),
         routing_method_type=2,  # DeepSeek-styled routing method
         use_shuffled_weight=False,
     )
@@ -1164,6 +1150,97 @@ direct_register_custom_op(
 )
 
 
+def flashinfer_fused_moe_per_tensor_scale_fp8(
+        routing_logits: torch.Tensor,
+        routing_bias: Optional[torch.Tensor],
+        hidden_states: torch.Tensor,
+        input_scale: torch.Tensor,
+        gemm1_weights: torch.Tensor,
+        gemm1_weights_scale: torch.Tensor,
+        activation_scale: torch.Tensor,
+        gemm2_weights: torch.Tensor,
+        gemm2_weights_scale: torch.Tensor,
+        num_experts: int,
+        top_k: int,
+        num_expert_group: Optional[int],
+        topk_group: Optional[int],
+        intermediate_size: int,
+        local_expert_offset: int,
+        local_num_experts: int,
+        use_routing_scales_on_input: bool,
+        routing_method_type: int,
+        routed_scaling_factor: float = 1.0) -> torch.Tensor:
+    num_expert_group = num_expert_group if num_expert_group is not None else 0
+    topk_group = topk_group if topk_group is not None else 0
+
+    quant_hidden_states, input_scale = moe_kernel_quantize_input(
+        hidden_states,
+        input_scale,
+        quant_dtype=torch.float8_e4m3fn,
+        per_act_token_quant=False)
+
+    output1_scales_scalar = gemm1_weights_scale * input_scale * (
+        1.0 / activation_scale)
+    output1_scales_gate_scalar = gemm1_weights_scale * input_scale
+    output2_scales_scalar = activation_scale * gemm2_weights_scale
+
+    from vllm.utils.flashinfer import (
+        flashinfer_trtllm_fp8_per_tensor_scale_moe)
+    return flashinfer_trtllm_fp8_per_tensor_scale_moe(
+        routing_logits=routing_logits,
+        routing_bias=routing_bias,
+        hidden_states=quant_hidden_states,
+        gemm1_weights=gemm1_weights,
+        output1_scales_scalar=output1_scales_scalar,
+        output1_scales_gate_scalar=output1_scales_gate_scalar,
+        gemm2_weights=gemm2_weights,
+        output2_scales_scalar=output2_scales_scalar,
+        num_experts=num_experts,
+        top_k=top_k,
+        n_group=num_expert_group,
+        topk_group=topk_group,
+        intermediate_size=intermediate_size,
+        local_expert_offset=local_expert_offset,
+        local_num_experts=local_num_experts,
+        routed_scaling_factor=routed_scaling_factor,
+        use_routing_scales_on_input=use_routing_scales_on_input,
+        tile_tokens_dim=calculate_tile_tokens_dim(hidden_states.shape[0],
+                                                  top_k, num_experts),
+        routing_method_type=routing_method_type)
+
+
+def flashinfer_fused_moe_per_tensor_scale_fp8_fake(
+        routing_logits: torch.Tensor,
+        routing_bias: torch.Tensor,
+        hidden_states: torch.Tensor,
+        gemm1_weights: torch.Tensor,
+        output1_scales_scalar: torch.Tensor,
+        output1_scales_gate_scalar: torch.Tensor,
+        gemm2_weights: torch.Tensor,
+        output2_scales_scalar: torch.Tensor,
+        num_experts: int,
+        top_k: int,
+        num_expert_group: int,
+        topk_group: int,
+        intermediate_size: int,
+        local_expert_offset: int,
+        local_num_experts: int,
+        routed_scaling_factor: float = 1.0,
+        use_routing_scales_on_input: bool = False,
+        tile_tokens_dim: int = 8,
+        routing_method_type: int = 0) -> torch.Tensor:
+    pass
+
+
+direct_register_custom_op(
+    op_name="flashinfer_fused_moe_per_tensor_scale_fp8",
+    op_func=flashinfer_fused_moe_per_tensor_scale_fp8,
+    mutates_args=["hidden_states"],
+    fake_impl=flashinfer_fused_moe_per_tensor_scale_fp8_fake,
+    tags=(torch.Tag.needs_fixed_stride_order, ),
+)
+
+
 def outplace_fused_experts(
         hidden_states: torch.Tensor,
         w1: torch.Tensor,
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
index 75f8adf34f7dd..8b6ed154bdbe4 100644
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -23,6 +23,9 @@ from vllm.model_executor.layers.quantization import QuantizationMethods
 from vllm.model_executor.layers.quantization.base_config import (
     QuantizationConfig, QuantizeMethodBase)
 from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
+from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
+    apply_flashinfer_per_tensor_scale_fp8, rotate_flashinfer_fp8_moe_weights,
+    swap_w13_to_w31)
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
     get_col_major_tma_aligned_tensor, requant_weight_ue8m0_inplace)
 from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
@@ -53,11 +56,6 @@ ACTIVATION_SCHEMES = ["static", "dynamic"]
 logger = init_logger(__name__)
 
 
-def _swap_w13_to_w31(x: torch.Tensor) -> torch.Tensor:
-    return x.reshape(-1, 2, x.shape[-2] // 2,
-                     x.shape[-1]).flip(dims=[1]).reshape(x.shape)
-
-
 def _is_col_major(x: torch.Tensor) -> bool:
     assert x.dim() == 3
     b, m, n = x.shape
@@ -695,11 +693,13 @@ class Fp8MoEMethod(FusedMoEMethodBase):
             elif self.flashinfer_moe_enabled:
                 # NOTE: weights have to be swapped since the activation is
                 # applied on different half for flashinfer vs vllm
-                w13_weight = _swap_w13_to_w31(layer.w13_weight.data)
-                w13_weight_scale_inv = _swap_w13_to_w31(
+                w13_weight = swap_w13_to_w31(layer.w13_weight.data)
+                w13_weight_scale_inv = swap_w13_to_w31(
                     layer.w13_weight_scale_inv.data)
                 w2_weight = layer.w2_weight.data
                 w2_weight_scale_inv = layer.w2_weight_scale_inv.data
+                if not self.block_quant:
+                    rotate_flashinfer_fp8_moe_weights(w13_weight, w2_weight)
             else:
                 w13_weight = layer.w13_weight.data
                 w13_weight_scale_inv = layer.w13_weight_scale_inv.data
@@ -998,30 +998,43 @@ class Fp8MoEMethod(FusedMoEMethodBase):
                 global_num_experts=global_num_experts,
                 expert_map=expert_map)
         elif self.flashinfer_moe_enabled:
-            # Currently only work with DS models
-            assert self.block_quant
-            assert (renormalize and use_grouped_topk
-                    and scoring_func == 'sigmoid'
-                    and custom_routing_function is None)
-            assert activation == "silu"
-            return torch.ops.vllm.flashinfer_fused_moe_blockscale_fp8(
-                routing_logits=router_logits.to(torch.float32),
-                routing_bias=e_score_correction_bias,
-                x=x,
-                w13_weight=layer.w13_weight,
-                w13_weight_scale_inv=layer.w13_weight_scale_inv,
-                w2_weight=layer.w2_weight,
-                w2_weight_scale_inv=layer.w2_weight_scale_inv,
-                global_num_experts=global_num_experts,
-                top_k=top_k,
-                num_expert_group=num_expert_group,
-                topk_group=topk_group,
-                intermediate_size=layer.intermediate_size_per_partition,
-                expert_offset=layer.ep_rank * layer.local_num_experts,
-                local_num_experts=layer.local_num_experts,
-                block_shape=self.quant_config.weight_block_size,
-                routed_scaling=1.0,
-            )
+            assert activation == 'silu'
+            assert scoring_func == 'sigmoid'
+            if self.block_quant:
+                assert (renormalize and use_grouped_topk
+                        and custom_routing_function is None)
+
+                return torch.ops.vllm.flashinfer_fused_moe_blockscale_fp8(
+                    routing_logits=router_logits.to(torch.float32),
+                    routing_bias=e_score_correction_bias,
+                    x=x,
+                    w13_weight=layer.w13_weight,
+                    w13_weight_scale_inv=layer.w13_weight_scale_inv,
+                    w2_weight=layer.w2_weight,
+                    w2_weight_scale_inv=layer.w2_weight_scale_inv,
+                    global_num_experts=global_num_experts,
+                    top_k=top_k,
+                    num_expert_group=num_expert_group,
+                    topk_group=topk_group,
+                    intermediate_size=layer.intermediate_size_per_partition,
+                    expert_offset=layer.ep_rank * layer.local_num_experts,
+                    local_num_experts=layer.local_num_experts,
+                    block_shape=self.quant_config.weight_block_size,
+                    routed_scaling=1.0,
+                )
+            else:
+                assert (not renormalize
+                        and custom_routing_function is not None)
+                return apply_flashinfer_per_tensor_scale_fp8(
+                    layer=layer,
+                    hidden_states=x,
+                    router_logits=router_logits,
+                    routing_bias=e_score_correction_bias,
+                    global_num_experts=global_num_experts,
+                    top_k=top_k,
+                    num_expert_group=num_expert_group,
+                    topk_group=topk_group,
+                    apply_router_weight_on_input=apply_router_weight_on_input)
         else:
             return self.fused_experts(
                 hidden_states=x,
diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py
index 8fbc3231d86c3..b8ffcf90c022b 100644
--- a/vllm/model_executor/layers/quantization/modelopt.py
+++ b/vllm/model_executor/layers/quantization/modelopt.py
@@ -23,6 +23,9 @@ from vllm.model_executor.layers.quantization import QuantizationMethods
 from vllm.model_executor.layers.quantization.base_config import (
     QuantizationConfig, QuantizeMethodBase)
 from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
+from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
+    apply_flashinfer_per_tensor_scale_fp8, rotate_flashinfer_fp8_moe_weights,
+    swap_w13_to_w31)
 from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
     apply_fp4_marlin_linear, is_fp4_marlin_supported,
     prepare_fp4_layer_for_marlin, prepare_moe_fp4_layer_for_marlin)
@@ -34,6 +37,7 @@ from vllm.model_executor.parameter import (ModelWeightParameter,
                                            PerTensorScaleParameter)
 from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
+from vllm.utils.flashinfer import has_flashinfer_moe
 
 logger = init_logger(__name__)
 
@@ -267,6 +271,11 @@ class ModelOptFp8MoEMethod(FusedMoEMethodBase):
         from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
             cutlass_fp8_supported)
         self.cutlass_fp8_supported = cutlass_fp8_supported()
+        self.flashinfer_moe_enabled = False
+        if envs.VLLM_USE_FLASHINFER_MOE_FP8 and has_flashinfer_moe():
+            logger.info_once(
+                "Using FlashInfer MoE FP8 kernels for ModelOptFp8MoEMethod.")
+            self.flashinfer_moe_enabled = True
 
     def create_weights(
         self,
@@ -410,6 +419,11 @@ class ModelOptFp8MoEMethod(FusedMoEMethodBase):
             layer.w2_input_scale = Parameter(layer.w2_input_scale.max(),
                                              requires_grad=False)
 
+        if self.flashinfer_moe_enabled:
+            layer.w13_weight.data = swap_w13_to_w31(layer.w13_weight.data)
+            rotate_flashinfer_fp8_moe_weights(layer.w13_weight,
+                                              layer.w2_weight)
+
     def apply(
         self,
         layer: torch.nn.Module,
@@ -436,6 +450,20 @@ class ModelOptFp8MoEMethod(FusedMoEMethodBase):
             raise NotImplementedError(
                 "EPLB not supported for `ModelOptFp8MoEMethod` yet.")
 
+        if self.flashinfer_moe_enabled:
+            assert activation == 'silu'
+            assert not renormalize
+            return apply_flashinfer_per_tensor_scale_fp8(
+                layer=layer,
+                hidden_states=x,
+                router_logits=router_logits,
+                routing_bias=e_score_correction_bias,
+                global_num_experts=global_num_experts,
+                top_k=top_k,
+                num_expert_group=num_expert_group,
+                topk_group=topk_group,
+                apply_router_weight_on_input=apply_router_weight_on_input)
+
         # Expert selection
         topk_weights, topk_ids = FusedMoE.select_experts(
             hidden_states=x,
diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
new file mode 100644
index 0000000000000..c6f914febc0a2
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Optional
+
+import torch
+
+
+def calculate_tile_tokens_dim(num_tokens, top_k, num_experts):
+    from flashinfer import next_positive_power_of_2
+
+    # Guess tokens per expert assuming perfect expert distribution first.
+    num_tokens_per_expert = (num_tokens * top_k) // num_experts
+    # And pad the number to the next power of 2.
+    tile_tokens_dim = next_positive_power_of_2(num_tokens_per_expert)
+    # Cap to 8-64 tokens per CTA tile as it's the range supported by the kernel.
+    tile_tokens_dim = min(max(tile_tokens_dim, 8), 64)
+    return tile_tokens_dim
+
+
+def swap_w13_to_w31(x: torch.Tensor) -> torch.Tensor:
+    return x.reshape(-1, 2, x.shape[-2] // 2,
+                     x.shape[-1]).flip(dims=[1]).reshape(x.shape)
+
+
+def rotate_flashinfer_fp8_moe_weights(gemm1_weights: torch.Tensor,
+                                      gemm2_weights: torch.Tensor):
+    from flashinfer import reorder_rows_for_gated_act_gemm, shuffle_matrix_a
+    epilogue_tile_m = 128
+    num_experts = gemm1_weights.shape[0]
+    hidden_size = gemm1_weights.shape[-1]
+    intermediate_size = gemm1_weights.shape[1] // 2
+
+    # Reorder rows of W1 for fused gated activation
+    gemm1_weights_fp8_interleaved = []
+    for i in range(num_experts):
+        gemm1_weights_fp8_interleaved.append(
+            reorder_rows_for_gated_act_gemm(gemm1_weights[i]))
+
+    # Stack weights and scales for all experts
+    gemm1_weights_fp8_interleaved = torch.stack(
+        gemm1_weights_fp8_interleaved).reshape(num_experts,
+                                               2 * intermediate_size,
+                                               hidden_size)
+
+    # Shuffle weights and scaling factors for transposed mma output
+    gemm1_weights_fp8_shuffled = []
+    gemm2_weights_fp8_shuffled = []
+    for i in range(num_experts):
+        gemm1_weights_fp8_shuffled.append(
+            shuffle_matrix_a(
+                gemm1_weights_fp8_interleaved[i].view(torch.uint8),
+                epilogue_tile_m))
+
+        gemm2_weights_fp8_shuffled.append(
+            shuffle_matrix_a(gemm2_weights[i].view(torch.uint8),
+                             epilogue_tile_m))
+
+    # Stack weights for all experts
+    gemm1_weights.data = torch.stack(gemm1_weights_fp8_shuffled).view(
+        torch.float8_e4m3fn)
+    gemm2_weights.data = torch.stack(gemm2_weights_fp8_shuffled).view(
+        torch.float8_e4m3fn)
+
+
+def apply_flashinfer_per_tensor_scale_fp8(
+    layer: torch.nn.Module,
+    hidden_states: torch.Tensor,
+    router_logits: torch.Tensor,
+    routing_bias: Optional[torch.Tensor],
+    top_k: int,
+    num_expert_group: Optional[int],
+    topk_group: Optional[int],
+    global_num_experts: int,
+    apply_router_weight_on_input: bool,
+) -> torch.Tensor:
+    from flashinfer.fused_moe import RoutingMethodType
+
+    from vllm.model_executor.models.llama4 import Llama4MoE
+    assert layer.custom_routing_function == Llama4MoE.custom_routing_function, \
+        "FusedMoE flashinfer kernels are only supported for Llama4"
+    return torch.ops.vllm.flashinfer_fused_moe_per_tensor_scale_fp8(
+        routing_logits=router_logits,
+        routing_bias=routing_bias,
+        hidden_states=hidden_states,
+        input_scale=layer.w13_input_scale,
+        gemm1_weights=layer.w13_weight,
+        gemm1_weights_scale=layer.w13_weight_scale,
+        gemm2_weights=layer.w2_weight,
+        gemm2_weights_scale=layer.w2_weight_scale,
+        activation_scale=layer.w2_input_scale,
+        num_experts=global_num_experts,
+        top_k=top_k,
+        num_expert_group=num_expert_group,
+        topk_group=topk_group,
+        intermediate_size=layer.intermediate_size_per_partition,
+        local_expert_offset=layer.ep_rank * layer.local_num_experts,
+        local_num_experts=layer.local_num_experts,
+        use_routing_scales_on_input=apply_router_weight_on_input,
+        routing_method_type=RoutingMethodType.Llama4,
+    )
\ No newline at end of file
diff --git a/vllm/utils/flashinfer.py b/vllm/utils/flashinfer.py
index ebc54fd029da6..3bfb9808c0a00 100644
--- a/vllm/utils/flashinfer.py
+++ b/vllm/utils/flashinfer.py
@@ -66,6 +66,8 @@ def _lazy_import_wrapper(module_name: str,
 # Create lazy wrappers for each function
 flashinfer_trtllm_fp8_block_scale_moe = _lazy_import_wrapper(
     "flashinfer.fused_moe", "trtllm_fp8_block_scale_moe")
+flashinfer_trtllm_fp8_per_tensor_scale_moe = _lazy_import_wrapper(
+    "flashinfer.fused_moe", "trtllm_fp8_per_tensor_scale_moe")
 flashinfer_cutlass_fused_moe = _lazy_import_wrapper("flashinfer.fused_moe",
                                                     "cutlass_fused_moe")
 fp4_quantize = _lazy_import_wrapper("flashinfer", "fp4_quantize")

From 94846416166c731939892350d7ab26dcbcb2982d Mon Sep 17 00:00:00 2001
From: Song <44120206+Oliver-ss@users.noreply.github.com>
Date: Thu, 31 Jul 2025 23:19:06 +0800
Subject: [PATCH 091/224] [Model] Add step3 vl (#21998)

Signed-off-by: oliveryuan <yuansong@step.ai>
Co-authored-by: oliveryuan <yuansong@step.ai>
---
 docs/models/supported_models.md               |    1 +
 tests/models/registry.py                      |    6 +
 .../openai/tool_parsers/__init__.py           |    2 +
 .../openai/tool_parsers/step3_tool_parser.py  |  296 +++++
 vllm/model_executor/models/registry.py        |    2 +
 vllm/model_executor/models/step3_text.py      |  521 ++++++++
 vllm/model_executor/models/step3_vl.py        | 1052 +++++++++++++++++
 vllm/reasoning/__init__.py                    |    2 +
 vllm/reasoning/step3_reasoning_parser.py      |  109 ++
 vllm/transformers_utils/config.py             |    5 +-
 vllm/transformers_utils/configs/__init__.py   |    6 +
 vllm/transformers_utils/configs/step3_vl.py   |  123 ++
 12 files changed, 2124 insertions(+), 1 deletion(-)
 create mode 100644 vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
 create mode 100644 vllm/model_executor/models/step3_text.py
 create mode 100644 vllm/model_executor/models/step3_vl.py
 create mode 100644 vllm/reasoning/step3_reasoning_parser.py
 create mode 100644 vllm/transformers_utils/configs/step3_vl.py

diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 5a9823bb6bae7..f5d9e3b22f2a6 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -625,6 +625,7 @@ See [this page](generative_models.md) for more information on how to use generat
 | `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `Qwen/Qwen2.5-Omni-7B` | | ✅︎ | ✅︎ |
 | `SkyworkR1VChatModel` | Skywork-R1V-38B | T + I | `Skywork/Skywork-R1V-38B` | | ✅︎ | ✅︎ |
 | `SmolVLMForConditionalGeneration` | SmolVLM2 | T + I | `SmolVLM2-2.2B-Instruct` | ✅︎ | | ✅︎ |
+| `Step3VLForConditionalGeneration` | Step3-VL | T + I<sup>+</sup> | `stepfun-ai/step3` | | ✅︎ | ✅︎ |
 | `TarsierForConditionalGeneration` | Tarsier | T + I<sup>E+</sup> | `omni-search/Tarsier-7b`, `omni-search/Tarsier-34b` | | ✅︎ | ✅︎ |
 | `Tarsier2ForConditionalGeneration`<sup>^</sup> | Tarsier2 | T + I<sup>E+</sup> + V<sup>E+</sup> | `omni-research/Tarsier2-Recap-7b`, `omni-research/Tarsier2-7b-0115` | | ✅︎ | ✅︎ |
 
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 8fcff5a8c5113..b9e7de4e9fd11 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -279,6 +279,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
     "StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),  # noqa: E501
     "StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
     "Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
+    "Step3TextForCausalLM": _HfExamplesInfo("stepfun-ai/step3",
+                                            trust_remote_code=True,
+                                            is_available_online=False),
     "SolarForCausalLM": _HfExamplesInfo("upstage/solar-pro-preview-instruct",
                                         trust_remote_code=True),
     "TeleChat2ForCausalLM": _HfExamplesInfo("Tele-AI/TeleChat2-3B",
@@ -457,6 +460,9 @@ _MULTIMODAL_EXAMPLE_MODELS = {
     "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B",
                                            trust_remote_code=True),
     "SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct"),  # noqa: E501
+    "Step3VLForConditionalGeneration": _HfExamplesInfo("stepfun-ai/step3",
+                                                        trust_remote_code=True,
+                                                        is_available_online=False),
     "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b",  # noqa: E501
                                      trust_remote_code=True),
     "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b",  # noqa: E501
diff --git a/vllm/entrypoints/openai/tool_parsers/__init__.py b/vllm/entrypoints/openai/tool_parsers/__init__.py
index 88c8aa929b78d..099e456aa486f 100644
--- a/vllm/entrypoints/openai/tool_parsers/__init__.py
+++ b/vllm/entrypoints/openai/tool_parsers/__init__.py
@@ -18,6 +18,7 @@ from .mistral_tool_parser import MistralToolParser
 from .phi4mini_tool_parser import Phi4MiniJsonToolParser
 from .pythonic_tool_parser import PythonicToolParser
 from .qwen3coder_tool_parser import Qwen3CoderToolParser
+from .step3_tool_parser import Step3ToolParser
 from .xlam_tool_parser import xLAMToolParser
 
 __all__ = [
@@ -40,4 +41,5 @@ __all__ = [
     "HunyuanA13BToolParser",
     "Glm4MoeModelToolParser",
     "Qwen3CoderToolParser",
+    "Step3ToolParser",
 ]
diff --git a/vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
new file mode 100644
index 0000000000000..a20d18eb52544
--- /dev/null
+++ b/vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
@@ -0,0 +1,296 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import contextlib
+import json
+from collections.abc import Sequence
+from typing import Any, Optional, Union
+
+import regex as re
+
+from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
+                                              DeltaFunctionCall, DeltaMessage,
+                                              DeltaToolCall,
+                                              ExtractedToolCallInformation,
+                                              FunctionCall, ToolCall)
+from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
+    ToolParser, ToolParserManager)
+from vllm.logger import init_logger
+from vllm.transformers_utils.tokenizer import AnyTokenizer
+from vllm.utils import random_uuid
+
+logger = init_logger(__name__)
+
+
+@ToolParserManager.register_module(["step3"])
+class Step3ToolParser(ToolParser):
+    """
+    Tool parser for a model that uses a specific XML-like format for tool calls.
+    This version uses a robust, stateful, cursor-based streaming parser and
+    consolidates tool arguments into a single message.
+    """
+
+    TOOL_CALLS_BEGIN = "<｜tool_calls_begin｜>"
+    TOOL_CALLS_END = "<｜tool_calls_end｜>"
+    TOOL_CALL_BEGIN = "<｜tool_call_begin｜>"
+    TOOL_CALL_END = "<｜tool_call_end｜>"
+    TOOL_SEP = "<｜tool_sep｜>"
+    SPECIAL_TOKENS = [
+        TOOL_CALLS_BEGIN, TOOL_CALLS_END, TOOL_CALL_BEGIN, TOOL_CALL_END
+    ]
+
+    def __init__(self, tokenizer: AnyTokenizer):
+        super().__init__(tokenizer)
+        self.position = 0
+        # Explicit state flags for robust streaming
+        self.tool_block_started = False
+        self.tool_block_finished = False
+
+    def adjust_request(
+            self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+        if request.tools and request.tool_choice != 'none':
+            request.skip_special_tokens = False
+        return request
+
+    @staticmethod
+    def _parse_steptml_invoke(
+            action_text: str
+    ) -> tuple[Optional[str], Optional[dict[str, str]]]:
+        func_name_match = re.search(r'<steptml:invoke name="([^"]+)">',
+                                    action_text)
+        if not func_name_match:
+            return None, None
+        func_name = func_name_match.group(1)
+
+        params: dict[str, str] = {}
+        param_matches = re.findall(
+            r'<steptml:parameter name="([^"]+)">([^<]*)</steptml:parameter>',
+            action_text)
+        for name, value in param_matches:
+            params[name] = value.strip()
+        return func_name, params
+
+    def _cast_arguments(
+        self,
+        func_name: str,
+        params: dict[str, Any],
+        request: ChatCompletionRequest,
+    ) -> dict[str, Any]:
+        for tool in request.tools or []:
+            if tool.function.name == func_name:
+                schema = tool.function.parameters or {}
+                properties = schema.get("properties", {})
+                for key, value in params.items():
+                    if not isinstance(value, str):
+                        continue
+                    prop = properties.get(key, {})
+                    typ = prop.get("type")
+                    if typ == "string":
+                        params[key] = value.strip()
+                    elif typ == "integer":
+                        with contextlib.suppress(ValueError):
+                            params[key] = int(value)
+                    elif typ == "number":
+                        with contextlib.suppress(ValueError):
+                            params[key] = float(value)
+                    elif typ == "boolean":
+                        lower_val = value.lower()
+                        params[key] = lower_val == "true" if lower_val in (
+                            "true", "false") else value
+                    elif typ == "null":
+                        params[key] = None if value.lower(
+                        ) == "null" else value
+                break
+        return params
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> Union[DeltaMessage, None]:
+
+        # The main loop processes the stream from the last known position.
+        while True:
+            if self.position >= len(current_text):
+                return None  # We've processed the entire stream.
+
+            unprocessed_text = current_text[self.position:]
+
+            # STATE: After all tools are done, all subsequent text is content.
+            if self.tool_block_finished:
+                self.position = len(current_text)
+                return DeltaMessage(content=unprocessed_text)
+
+            # STATE: Before the tool block has started.
+            if not self.tool_block_started:
+                if unprocessed_text.startswith(self.TOOL_CALLS_BEGIN):
+                    self.position += len(self.TOOL_CALLS_BEGIN)
+                    self.tool_block_started = True
+                    continue  # Token consumed, re-loop.
+
+                start_pos = unprocessed_text.find(self.TOOL_CALLS_BEGIN)
+                if start_pos == -1:
+                    if self.TOOL_CALLS_BEGIN.startswith(
+                            unprocessed_text.strip()) and unprocessed_text:
+                        return None  # It's a prefix, wait.
+                    self.position = len(current_text)
+                    return DeltaMessage(content=unprocessed_text)
+                else:
+                    content = unprocessed_text[:start_pos]
+                    self.position += len(content)
+                    return DeltaMessage(content=content)
+
+            # STATE: Inside the main tool block.
+            offset = len(unprocessed_text) - len(unprocessed_text.lstrip())
+            unprocessed_text = unprocessed_text.lstrip()
+            self.position += offset
+
+            if unprocessed_text.startswith(self.TOOL_CALLS_END):
+                self.position += len(self.TOOL_CALLS_END)
+                self.tool_block_finished = True
+                self.current_tool_id = -1
+                continue
+
+            # Check if we are between tool calls.
+            tool_finished = (
+                self.current_tool_id != -1 and
+                self.prev_tool_call_arr[self.current_tool_id].get("finished"))
+            if self.current_tool_id == -1 or tool_finished:
+                if unprocessed_text.startswith(self.TOOL_CALL_BEGIN):
+                    self.position += len(self.TOOL_CALL_BEGIN)
+                    if self.current_tool_id == -1:
+                        self.current_tool_id = 0
+                    else:
+                        self.current_tool_id += 1
+                    self.current_tool_name_sent = False
+                    while len(self.prev_tool_call_arr) <= self.current_tool_id:
+                        self.prev_tool_call_arr.append({})
+                    self.prev_tool_call_arr[
+                        self.current_tool_id]["finished"] = False
+                    continue
+
+                if self.TOOL_CALL_BEGIN.startswith(unprocessed_text):
+                    return None
+
+            # STATE: Parsing an active tool call.
+            if self.current_tool_id != -1 and not self.prev_tool_call_arr[
+                    self.current_tool_id].get("finished", False):
+                end_tool_pos = unprocessed_text.find(self.TOOL_CALL_END)
+                if end_tool_pos == -1:
+                    tool_body = unprocessed_text
+                else:
+                    tool_body = unprocessed_text[:end_tool_pos]
+
+                if end_tool_pos == -1 and self.TOOL_CALL_END.startswith(
+                        tool_body):
+                    return None
+
+                function_name, arguments = self._parse_steptml_invoke(
+                    tool_body)
+                if not function_name:
+                    return None
+
+                tool_call_arr = {
+                    "name": function_name,
+                    "parameters": arguments or {}
+                }
+
+                # Send the function name as soon as it's parsed.
+                if not self.current_tool_name_sent:
+                    self.current_tool_name_sent = True
+                    self.prev_tool_call_arr[self.current_tool_id].update(
+                        tool_call_arr)
+                    return DeltaMessage(tool_calls=[
+                        DeltaToolCall(index=self.current_tool_id,
+                                      type="function",
+                                      id=f"chatcmpl-tool-{random_uuid()}",
+                                      function=DeltaFunctionCall(
+                                          name=function_name))
+                    ])
+
+                # Update our internal state with the latest parsed arguments.
+                self.prev_tool_call_arr[
+                    self.current_tool_id].update(  # noqa: E501
+                        tool_call_arr)
+
+                # Only send arguments when the tool call is complete.
+                if end_tool_pos != -1:
+                    self.position += end_tool_pos + len(self.TOOL_CALL_END)
+                    self.prev_tool_call_arr[
+                        self.current_tool_id]["finished"] = True
+
+                    final_args = self._cast_arguments(
+                        function_name,
+                        tool_call_arr.get("parameters", {}),  # type: ignore
+                        request)
+                    if final_args:
+                        final_args_json = json.dumps(final_args,
+                                                     ensure_ascii=False)
+                        return DeltaMessage(tool_calls=[
+                            DeltaToolCall(index=self.current_tool_id,
+                                          function=DeltaFunctionCall(
+                                              arguments=final_args_json))
+                        ])
+
+                # If tool is not finished, return None to wait for more tokens.
+                return None
+
+            return None
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        if self.TOOL_CALLS_BEGIN not in model_output:
+            return ExtractedToolCallInformation(tools_called=False,
+                                                tool_calls=[],
+                                                content=model_output)
+
+        pre_text, rest = model_output.split(self.TOOL_CALLS_BEGIN, 1)
+        if self.TOOL_CALLS_END not in rest:
+            return ExtractedToolCallInformation(tools_called=False,
+                                                tool_calls=[],
+                                                content=model_output)
+
+        tool_block, post_text = rest.split(self.TOOL_CALLS_END, 1)
+        content = (pre_text + post_text).strip()
+
+        tool_calls: list[ToolCall] = []
+        call_parts = tool_block.split(self.TOOL_CALL_BEGIN)
+
+        for part in call_parts:
+            if not part or self.TOOL_CALL_END not in part:
+                continue
+
+            call_content = part.split(self.TOOL_CALL_END, 1)[0]
+            if self.TOOL_SEP not in call_content:
+                continue
+
+            type_part, invoke_part = call_content.split(self.TOOL_SEP, 1)
+            if type_part.strip() != "function":
+                continue
+
+            function_name, params_dict = self._parse_steptml_invoke(
+                invoke_part)
+
+            if function_name and params_dict is not None:
+                params_dict = self._cast_arguments(function_name, params_dict,
+                                                   request)
+                params_str = json.dumps(params_dict, ensure_ascii=False)
+                tool_calls.append(
+                    ToolCall(function=FunctionCall(name=function_name,
+                                                   arguments=params_str)))
+        if tool_calls:
+            return ExtractedToolCallInformation(
+                tools_called=True,
+                tool_calls=tool_calls,
+                content=content if content else None)
+        return ExtractedToolCallInformation(tools_called=False,
+                                            tool_calls=[],
+                                            content=model_output)
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index 51831a770347a..848c04b9b32f7 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -129,6 +129,7 @@ _TEXT_GENERATION_MODELS = {
     "Qwen3ForCausalLM": ("qwen3", "Qwen3ForCausalLM"),
     "Qwen3MoeForCausalLM": ("qwen3_moe", "Qwen3MoeForCausalLM"),
     "RWForCausalLM": ("falcon", "FalconForCausalLM"),
+    "Step3TextForCausalLM": ("step3_text", "Step3TextForCausalLM"),
     "StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"),
     "StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
     "Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
@@ -238,6 +239,7 @@ _MULTIMODAL_MODELS = {
     "Qwen2_5OmniModel": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"),  # noqa: E501
     "Qwen2_5OmniForConditionalGeneration": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"),  # noqa: E501
     "UltravoxModel": ("ultravox", "UltravoxModel"),
+    "Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"),  # noqa: E501
     "TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"),  # noqa: E501
     "Tarsier2ForConditionalGeneration": ("qwen2_vl", "Tarsier2ForConditionalGeneration"),  # noqa: E501
     "VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"),  # noqa: E501
diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py
new file mode 100644
index 0000000000000..47d2af5c2a140
--- /dev/null
+++ b/vllm/model_executor/models/step3_text.py
@@ -0,0 +1,521 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Inference-only Jurassic model."""
+from collections.abc import Iterable
+from typing import Any, Optional
+
+import torch
+from torch import nn
+
+from vllm.attention import Attention
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, ModelConfig, VllmConfig
+from vllm.distributed import (get_pp_group,
+                              get_tensor_model_parallel_world_size,
+                              tensor_model_parallel_all_reduce)
+from vllm.logger import init_logger
+from vllm.model_executor.layers.activation import SiluAndMul
+from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (ColumnParallelLinear,
+                                               MergedColumnParallelLinear,
+                                               ReplicatedLinear,
+                                               RowParallelLinear)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizationConfig)
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+from vllm.sequence import IntermediateTensors
+
+from .interfaces import SupportsPP
+from .utils import (PPMissingLayer, is_pp_missing_parameter,
+                    make_empty_intermediate_tensors_factory, make_layers)
+
+logger = init_logger(__name__)
+
+
+class FusedMoEBlock(nn.Module):
+
+    def __init__(self,
+                 config: ModelConfig,
+                 quant_config: Optional[QuantizationConfig] = None,
+                 prefix: str = ""):
+        super().__init__()
+        self.tp_size = get_tensor_model_parallel_world_size()
+
+        if self.tp_size > config.moe_num_experts:
+            raise ValueError(
+                f"Tensor parallel size {self.tp_size} is greater than "
+                f"the number of experts {config.moe_num_experts}.")
+
+        self.experts = FusedMoE(num_experts=config.moe_num_experts,
+                                top_k=config.moe_top_k,
+                                hidden_size=config.hidden_size,
+                                intermediate_size=config.moe_intermediate_size,
+                                reduce_results=False,
+                                renormalize=config.norm_expert_weight,
+                                quant_config=quant_config,
+                                prefix=f"{prefix}.experts")
+        self.gate = ReplicatedLinear(config.hidden_size,
+                                     config.moe_num_experts,
+                                     bias=False,
+                                     quant_config=None,
+                                     prefix=f"{prefix}.gate")
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        orig_shape = hidden_states.shape
+        hidden_dim = hidden_states.shape[-1]
+        hidden_states = hidden_states.view(-1, hidden_dim)
+
+        router_logits, _ = self.gate(hidden_states)
+
+        final_hidden_states = self.experts(hidden_states=hidden_states,
+                                           router_logits=router_logits)
+        if self.tp_size > 1:
+            final_hidden_states = tensor_model_parallel_all_reduce(
+                final_hidden_states)
+
+        return final_hidden_states.view(orig_shape)
+
+
+class Step3TextMLP(nn.Module):
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_act: str,
+        quant_config: Optional[QuantizationConfig] = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.gate_up_proj = MergedColumnParallelLinear(
+            hidden_size, [intermediate_size] * 2,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.gate_up_proj")
+        self.down_proj = RowParallelLinear(intermediate_size,
+                                           hidden_size,
+                                           bias=False,
+                                           quant_config=quant_config,
+                                           prefix=f"{prefix}.down_proj")
+        if hidden_act != "silu":
+            raise ValueError(f"Unsupported activation: {hidden_act}. "
+                             "Only silu is supported for now.")
+        self.act_fn = SiluAndMul()
+        self.hidden_size = hidden_size
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        gate_up, _ = self.gate_up_proj(hidden_states)
+        intermediate_act = self.act_fn(gate_up)
+        output, _ = self.down_proj(intermediate_act)
+        return output
+
+
+class Step3TextAttention(nn.Module):
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        norm_eps: float,
+        rope_theta: int,
+        share_q_dim: Optional[int] = None,
+        rope_scaling: Optional[dict[str, Any]] = None,
+        max_position_embedding: int = 8192,
+        head_dim: int = 256,
+        cache_config: Optional[CacheConfig] = None,
+        quant_config: Optional[QuantizationConfig] = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.hidden_size = hidden_size
+        tp_size = get_tensor_model_parallel_world_size()
+
+        self.total_num_heads = num_heads
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+
+        if num_kv_heads != 1:
+            raise ValueError(f"Step3TextAttention num_kv_heads must be 1, "
+                             f"but got {num_kv_heads}.")
+        self.num_kv_heads = num_kv_heads
+
+        self.head_dim = head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.q_size = share_q_dim if share_q_dim else self.head_dim
+
+        self.qkv_proj = ReplicatedLinear(
+            hidden_size,
+            self.q_size + self.kv_size * 2,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+        self.inter_norm = RMSNorm(self.q_size, eps=norm_eps)
+        self.wq = ColumnParallelLinear(
+            self.q_size,
+            self.head_dim * self.total_num_heads,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.wq",
+        )
+        self.rotary_emb = get_rope(self.head_dim,
+                                   rotary_dim=self.head_dim,
+                                   max_position=max_position_embedding,
+                                   base=rope_theta,
+                                   rope_scaling=rope_scaling)
+        scaling = self.head_dim**-0.5
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              scaling,
+                              self.num_kv_heads,
+                              cache_config=cache_config,
+                              prefix=f"{prefix}.attn")
+
+    def forward(self, positions: torch.Tensor,
+                hidden_states: torch.Tensor) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+        q = self.inter_norm(q)
+        q = self.wq(q)[0]
+        q, k = self.rotary_emb(positions, q, k)
+        attn_output = self.attn(q, k, v)
+        residual, _ = self.o_proj(attn_output)
+        return residual
+
+
+class Step3TextDecoderLayer(nn.Module):
+
+    def __init__(self,
+                 config: ModelConfig,
+                 cache_config: Optional[CacheConfig] = None,
+                 quant_config: Optional[QuantizationConfig] = None,
+                 prefix: str = "") -> None:
+        super().__init__()
+        config = config.hf_config
+        self.hidden_size = config.hidden_size
+        rope_scaling = getattr(config, "rope_scaling", None)
+
+        self.self_attn = Step3TextAttention(
+            hidden_size=self.hidden_size,
+            num_heads=config.num_attention_heads,
+            num_kv_heads=1,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            norm_eps=config.rms_norm_eps,
+            max_position_embedding=config.max_position_embedding,
+            head_dim=config.head_dim,
+            share_q_dim=config.share_q_dim,
+            rope_theta=config.rope_theta,
+            rope_scaling=rope_scaling,
+            prefix=f"{prefix}.self_attn")
+
+        layer_idx = int(prefix.split("layers.")[1].split(".")[0])
+        moe_layers_enum = getattr(config, "moe_layers_enum", None)
+        if moe_layers_enum is not None:
+            moe_layers_idx = [
+                int(i) for i in moe_layers_enum.strip().split(',')
+            ]
+        else:
+            # Default to 1dense.
+            moe_layers_idx = [i for i in range(1, config.num_hidden_layers)]
+
+        if layer_idx in moe_layers_idx:
+            self.moe = FusedMoEBlock(config=config,
+                                     quant_config=quant_config,
+                                     prefix=f"{prefix}.moe")
+            self.share_expert = Step3TextMLP(
+                hidden_size=self.hidden_size,
+                intermediate_size=config.share_expert_dim,
+                hidden_act="silu",
+                quant_config=quant_config,
+                prefix=f"{prefix}.share_expert")
+            self.use_moe = True
+        else:
+            self.mlp = Step3TextMLP(hidden_size=config.hidden_size,
+                                    intermediate_size=config.intermediate_size,
+                                    hidden_act="silu",
+                                    quant_config=quant_config,
+                                    prefix=f"{prefix}.mlp")
+            self.use_moe = False
+        self.input_layernorm = RMSNorm(config.hidden_size,
+                                       eps=config.rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(config.hidden_size,
+                                                eps=config.rms_norm_eps)
+
+    def forward(
+            self, positions: torch.Tensor, hidden_states: torch.Tensor,
+            residual: Optional[torch.Tensor]
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+        else:
+            hidden_states, residual = self.input_layernorm(
+                hidden_states, residual)
+
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+        )
+
+        hidden_states, residual = self.post_attention_layernorm(
+            hidden_states, residual)
+
+        if self.use_moe:
+            share_output = self.share_expert(hidden_states)
+            moe_output = self.moe(hidden_states)
+            hidden_states = share_output + moe_output
+        else:
+            hidden_states = self.mlp(hidden_states)
+
+        return hidden_states, residual
+
+
+@support_torch_compile
+class Step3TextModel(nn.Module):
+
+    def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None:
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
+        self.vocab_size = config.vocab_size
+        self.config = config
+
+        if get_pp_group().is_first_rank or (config.tie_word_embeddings
+                                            and get_pp_group().is_last_rank):
+            self.embed_tokens = VocabParallelEmbedding(
+                self.vocab_size,
+                config.hidden_size,
+            )
+        else:
+            self.embed_tokens = PPMissingLayer()
+
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: Step3TextDecoderLayer(config=vllm_config.
+                                                 model_config,
+                                                 cache_config=cache_config,
+                                                 quant_config=quant_config,
+                                                 prefix=prefix),
+            prefix=f"{prefix}.layers",
+        )
+        if get_pp_group().is_last_rank:
+            self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        else:
+            self.norm = PPMissingLayer()
+
+        self.make_empty_intermediate_tensors = (
+            make_empty_intermediate_tensors_factory(["hidden_states"],
+                                                    config.hidden_size))
+
+    def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: Optional[IntermediateTensors] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.get_input_embeddings(input_ids)
+            residual = None
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            residual = intermediate_tensors["residual"]
+
+        for i in range(self.start_layer, self.end_layer):
+            layer = self.layers[i]
+            hidden_states, residual = layer(positions, hidden_states, residual)
+
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors({
+                "hidden_states": hidden_states,
+                "residual": residual,
+            })
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+        return hidden_states
+
+
+class Step3TextForCausalLM(nn.Module, SupportsPP):
+
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        lora_config = vllm_config.lora_config
+        self.config = config
+        self.vllm_config = vllm_config
+
+        self.model = Step3TextModel(vllm_config=vllm_config, prefix=prefix)
+
+        if get_pp_group().is_last_rank:
+            self.unpadded_vocab_size = config.vocab_size
+            if lora_config:
+                self.unpadded_vocab_size += lora_config.lora_extra_vocab_size
+            self.lm_head = ParallelLMHead(
+                self.unpadded_vocab_size,
+                config.hidden_size,
+                org_num_embeddings=config.vocab_size,
+                padding_size=DEFAULT_VOCAB_PADDING_SIZE
+                if not lora_config else lora_config.lora_vocab_padding_size,
+            )
+            self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,
+                                                    config.vocab_size)
+            self.sampler = get_sampler()
+        else:
+            self.lm_head = PPMissingLayer()
+
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors)
+
+    def forward(self,
+                input_ids: torch.Tensor,
+                positions: torch.Tensor,
+                intermediate_tensors: Optional[IntermediateTensors] = None,
+                inputs_embeds: Optional[torch.Tensor] = None):
+        hidden_states = self.model(input_ids, positions, intermediate_tensors,
+                                   inputs_embeds)
+        return hidden_states
+
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head, hidden_states,
+                                       sampling_metadata)
+        return logits
+
+    def sample(
+        self,
+        logits: Optional[torch.Tensor],
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[SamplerOutput]:
+        next_tokens = self.sampler(logits, sampling_metadata)
+        return next_tokens
+
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
+        qkv_params_mapping = [
+            # (param_name, shard_name, relative_start_idx, relative_end_idx)
+            (".qkv_proj", ".q_proj", 0, self.config.share_q_dim /
+             (self.config.share_q_dim + self.config.head_dim * 2)),
+            (".qkv_proj", ".k_proj", self.config.share_q_dim /
+             (self.config.share_q_dim + self.config.head_dim * 2),
+             (self.config.share_q_dim + self.config.head_dim) /
+             (self.config.share_q_dim + self.config.head_dim * 2)),
+            (".qkv_proj", ".v_proj",
+             (self.config.share_q_dim + self.config.head_dim) /
+             (self.config.share_q_dim + self.config.head_dim * 2),
+             (self.config.share_q_dim + self.config.head_dim * 2) /
+             (self.config.share_q_dim + self.config.head_dim * 2)),
+        ]
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            (".gate_up_proj", ".gate_proj", 0),
+            (".gate_up_proj", ".up_proj", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        expert_params_mapping = [
+            (".moe.experts.w13_weight", ".moe.gate_proj.weight", "w1"),
+            (".moe.experts.w13_weight", ".moe.up_proj.weight", "w3"),
+            (".moe.experts.w2_weight", ".moe.down_proj.weight", "w2")
+        ]
+
+        disable_moe_stacked_params = [
+            data[1] for data in expert_params_mapping
+        ]
+
+        for name, loaded_weight in weights:
+            for (param_name, weight_name, shard_id) in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                if any(disable_moe_stacked_param in name
+                       for disable_moe_stacked_param in
+                       disable_moe_stacked_params):
+                    continue
+                name = name.replace(weight_name, param_name)
+                if is_pp_missing_parameter(name, self):
+                    continue
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name)
+                break
+            else:
+                for mapping in expert_params_mapping:
+                    param_name, weight_name, shard_id = mapping
+                    if weight_name not in name:
+                        continue
+                    name = name.replace(weight_name, param_name)
+                    # Skip layers on other devices.
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    # Skip loading extra bias for GPTQ models.
+                    if ((name.endswith(".bias") or name.endswith("_bias"))
+                            and name not in params_dict):
+                        continue
+                    param = params_dict[name]
+                    weight_loader = param.weight_loader
+                    for expert_id in range(loaded_weight.shape[0]):
+                        loaded_weight_expert = loaded_weight[expert_id]
+                        weight_loader(param,
+                                      loaded_weight_expert,
+                                      name,
+                                      shard_id=shard_id,
+                                      expert_id=expert_id)
+                    loaded_params.add(name)
+                    break
+                else:
+                    for (param_name, weight_name, start_idx,
+                         end_idx) in qkv_params_mapping:
+                        if weight_name not in name:
+                            continue
+                        name = name.replace(weight_name, param_name)
+                        if is_pp_missing_parameter(name, self):
+                            continue
+                        param = params_dict[name]
+                        dim = param.shape[param.output_dim]
+                        begin_idx = int(start_idx * dim)
+                        end_idx = int(end_idx * dim)
+                        param_slice = param.narrow(param.output_dim, begin_idx,
+                                                   end_idx - begin_idx)
+                        param_slice.copy_(loaded_weight)
+                        loaded_params.add(name)
+                        break
+                    else:
+                        if is_pp_missing_parameter(name, self):
+                            continue
+                        param = params_dict[name]
+                        weight_loader = getattr(param, "weight_loader",
+                                                default_weight_loader)
+                        weight_loader(param, loaded_weight)
+                        loaded_params.add(name)
+        return loaded_params
diff --git a/vllm/model_executor/models/step3_vl.py b/vllm/model_executor/models/step3_vl.py
new file mode 100644
index 0000000000000..363c12a4bf2b8
--- /dev/null
+++ b/vllm/model_executor/models/step3_vl.py
@@ -0,0 +1,1052 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import math
+from collections.abc import Iterable, Mapping, Sequence
+from functools import cached_property
+from itertools import product
+from math import ceil, sqrt
+from typing import Any, Literal, Optional, TypedDict, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from PIL import Image
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+from transformers import BatchFeature, PretrainedConfig, TensorType
+
+from vllm.config import VllmConfig
+from vllm.distributed import get_tensor_model_parallel_world_size
+from vllm.model_executor.layers.activation import get_act_fn
+from vllm.model_executor.layers.linear import (ColumnParallelLinear,
+                                               QKVParallelLinear,
+                                               RowParallelLinear)
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
+                                    MultiModalKwargs, NestedTensors)
+from vllm.multimodal.parse import ImageSize, MultiModalDataItems
+from vllm.multimodal.processing import (BaseMultiModalProcessor,
+                                        BaseProcessingInfo, PromptReplacement,
+                                        PromptUpdate, PromptUpdateDetails)
+from vllm.multimodal.profiling import BaseDummyInputsBuilder
+from vllm.sequence import IntermediateTensors
+from vllm.transformers_utils.configs import Step3VisionEncoderConfig
+from vllm.transformers_utils.tokenizer import AnyTokenizer
+
+from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
+from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn,
+                    init_vllm_registered_model, maybe_prefix,
+                    merge_multimodal_embeddings)
+
+
+class Step3VLImagePixelInputs(TypedDict):
+    type: Literal["pixel_values"]
+    pixel_values: torch.Tensor
+    patch_pixel_values: Optional[torch.Tensor]
+    num_patches: list[int]
+
+
+class Step3VLImageEmbeddingInputs(TypedDict):
+    type: Literal["image_embeds"]
+    image_embeds: torch.Tensor
+
+
+Step3VLImageInputs = Union[Step3VLImagePixelInputs,
+                           Step3VLImageEmbeddingInputs]
+
+ImageWithPatches = tuple[Image.Image, list[Image.Image], list[int] | None]
+
+MAX_IMAGE_SIZE: int = 3024
+
+
+class Step3VisionProcessor:
+
+    def __init__(self, size, interpolation_mode="bicubic", patch_size=None):
+        mean = [0.48145466, 0.4578275, 0.40821073]
+        std = [0.26862954, 0.26130258, 0.27577711]
+        patch_size = patch_size if patch_size is not None else size
+
+        self.transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean, std),
+            transforms.Resize(
+                (size, size),
+                interpolation=InterpolationMode.BICUBIC if interpolation_mode
+                == "bicubic" else InterpolationMode.BILINEAR,
+                antialias=True),
+        ])
+
+        self.patch_transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean, std),
+            transforms.Resize(
+                (patch_size, patch_size),
+                interpolation=InterpolationMode.BICUBIC if interpolation_mode
+                == "bicubic" else InterpolationMode.BILINEAR,
+                antialias=True),
+        ]) if patch_size is not None else None
+
+    def __call__(self, image, is_patch=False):
+        if is_patch:
+            return {"pixel_values": self.patch_transform(image).unsqueeze(0)}
+        else:
+            return {"pixel_values": self.transform(image).unsqueeze(0)}
+
+
+class ImagePatcher:
+
+    def determine_window_size(self, long: int, short: int) -> int:
+        if long <= 728:
+            return short if long / short > 1.5 else 0
+        return min(short, 504) if long / short > 4 else 504
+
+    def slide_window(
+        self,
+        width: int,
+        height: int,
+        sizes: list[tuple[int, int]],
+        steps: list[tuple[int, int]],
+        img_rate_thr: float = 0.6,
+    ) -> tuple[list[tuple[int, int, int, int]], tuple[int, int]]:
+        assert 1 >= img_rate_thr >= 0, "The `in_rate_thr` should lie in 0~1"
+        windows = []
+        # Sliding windows.
+        for size, step in zip(sizes, steps):
+            size_w, size_h = size
+            step_w, step_h = step
+
+            x_num = 1 if width <= size_w else ceil((width - size_w) / step_w +
+                                                   1)
+            x_start = [step_w * i for i in range(x_num)]
+            if len(x_start) > 1 and x_start[-1] + size_w > width:
+                x_start[-1] = width - size_w
+
+            y_num = 1 if height <= size_h else ceil((height - size_h) /
+                                                    step_h + 1)
+            y_start = [step_h * i for i in range(y_num)]
+            if len(y_start) > 1 and y_start[-1] + size_h > height:
+                y_start[-1] = height - size_h
+
+            start = np.array(list(product(y_start, x_start)), dtype=int)
+            start[:, [0, 1]] = start[:, [1, 0]]
+            windows.append(np.concatenate([start, start + size], axis=1))
+        windows = np.concatenate(windows, axis=0)
+
+        return [(int(box[0]), int(box[1]), int(box[2] - box[0]),
+                 int(box[3] - box[1])) for box in windows], (x_num, y_num)
+
+    def square_pad(self, img: Image.Image) -> Image.Image:
+        w, h = img.size
+        if w == h:
+            return img
+        size = max(w, h)
+        padded = Image.new(img.mode, (size, size), 0)
+        padded.paste(img, (0, 0))
+        return padded
+
+    def get_image_size_for_padding(self, img_width: int,
+                                   img_height: int) -> tuple[int, int]:
+        ratio = img_width / img_height
+        if min(img_height, img_width) < 32 and (ratio > 4 or ratio < 1 / 4):
+            new_size = max(img_height, img_width)
+            return new_size, new_size
+        return img_width, img_height
+
+    def get_image_size_for_preprocess(self, img_width: int,
+                                      img_height: int) -> tuple[int, int]:
+
+        if max(img_height, img_width) > MAX_IMAGE_SIZE:
+            scale_factor = MAX_IMAGE_SIZE / max(img_height, img_width)
+            img_width = int(img_width * scale_factor)
+            img_height = int(img_height * scale_factor)
+        return img_width, img_height
+
+    def get_image_size_for_crop(self, img_width: int, img_height: int,
+                                window_size: int):
+        w_ratio = img_width / window_size
+        h_ratio = img_height / window_size
+
+        if w_ratio < 1:
+            width_new = img_width
+        else:
+            decimal_w = w_ratio - img_width // window_size
+            w_ratio = int(w_ratio) + 1 if decimal_w > 0.2 else int(w_ratio)
+            width_new = window_size * w_ratio
+        if h_ratio < 1:
+            height_new = img_height
+        else:
+            decimal_h = h_ratio - img_height // window_size
+            h_ratio = int(h_ratio) + 1 if decimal_h > 0.2 else int(h_ratio)
+            height_new = window_size * h_ratio
+        return int(width_new), int(height_new)
+
+    def patch_crop(self, img: Image.Image, i: int, j: int, th: int, tw: int):
+        target = img.crop((j, i, j + tw, i + th))
+        return target
+
+    def get_num_patches(self, img_width: int,
+                        img_height: int) -> tuple[int, int]:
+        img_width, img_height = self.get_image_size_for_padding(
+            img_width, img_height)
+        img_width, img_height = self.get_image_size_for_preprocess(
+            img_width, img_height)
+        window_size = self.determine_window_size(max(img_height, img_width),
+                                                 min(img_height, img_width))
+        if window_size == 0:
+            return 0, 0
+        else:
+            img_width, img_height = self.get_image_size_for_crop(
+                img_width, img_height, window_size)
+            center_list, (x_num, y_num) = self.slide_window(
+                img_width, img_height, [(window_size, window_size)],
+                [(window_size, window_size)])
+            full_rows = (len(center_list) - 1) // x_num + 1
+            if len(center_list) > 0 and len(center_list) % x_num == 0:
+                full_rows -= 1
+            return len(center_list), full_rows
+
+    def __call__(
+        self, img: Image.Image
+    ) -> tuple[Image.Image, list[Image.Image], list[bool] | None]:
+        img_width, img_height = img.size
+        new_img_width, new_img_height = self.get_image_size_for_padding(
+            img_width, img_height)
+        if new_img_width != img_width or new_img_height != img_height:
+            img = self.square_pad(img)
+            img_width, img_height = img.size
+
+        new_img_width, new_img_height = self.get_image_size_for_preprocess(
+            img_width, img_height)
+        img = img.resize((new_img_width, new_img_height),
+                         Image.Resampling.BILINEAR)
+        window_size = self.determine_window_size(
+            max(new_img_height, new_img_width),
+            min(new_img_height, new_img_width))
+
+        if window_size == 0:
+            return img, [], None
+        else:
+            new_img_width, new_img_height = self.get_image_size_for_crop(
+                new_img_width, new_img_height, window_size)
+            if (new_img_width, new_img_height) != (img_width, img_height):
+                img_for_crop = img.resize((new_img_width, new_img_height),
+                                          Image.Resampling.BILINEAR)
+            else:
+                img_for_crop = img
+
+            patches = []
+            newlines = []
+            center_list, (x_num, y_num) = self.slide_window(
+                new_img_width, new_img_height, [(window_size, window_size)],
+                [(window_size, window_size)])
+            for patch_id, center_lf_point in enumerate(center_list):
+                x, y, patch_w, patch_h = center_lf_point
+                big_patch = self.patch_crop(img_for_crop, y, x, patch_h,
+                                            patch_w)
+                patches.append(big_patch)
+                if (patch_id + 1) % x_num == 0:
+                    newlines.append(patch_id)
+
+            if newlines and newlines[-1] == len(patches) - 1:
+                newlines.pop()
+
+            return img, patches, [i in newlines for i in range(len(patches))
+                                  ] if len(patches) > 0 else None
+
+
+class Step3VLProcessor:
+
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        tokenizer: AnyTokenizer,
+    ) -> None:
+        super().__init__()
+
+        self.config = config
+        self.tokenizer = tokenizer
+
+        self.image_size = 728
+        self.patch_size = 504
+        self.image_preprocessor = Step3VisionProcessor(self.image_size,
+                                                       "bilinear",
+                                                       self.patch_size)
+
+        self.num_image_feature_size = 169
+        self.num_patch_feature_size = 81
+        self.image_token = "<im_patch>"
+        self.image_feature_placeholder = (self.image_token *
+                                          self.num_image_feature_size)
+        self.patch_feature_placeholder = (self.image_token *
+                                          self.num_patch_feature_size)
+
+        self.patcher = ImagePatcher()
+
+    @property
+    def image_token_id(self) -> int:
+        return self.tokenizer.get_vocab()[self.image_token]
+
+    def get_num_image_tokens(self, img_width: int, img_height: int) -> int:
+        num_patches, num_newlines = self.patcher.get_num_patches(
+            img_width, img_height)
+
+        return num_patches * (
+            self.num_patch_feature_size +
+            2) + self.num_image_feature_size + 2 + num_newlines
+
+    def _split_images(self,
+                      images: list[Image.Image]) -> list[ImageWithPatches]:
+        result = []
+        for img in images:
+            result.append(self.patcher(img))
+        return result
+
+    def _convert_images_to_pixel_values(
+        self,
+        images: list[Image.Image],
+        is_patch: bool = False,
+    ) -> list[torch.Tensor]:
+        return [
+            self.image_preprocessor(img, is_patch=is_patch)["pixel_values"]
+            for img in images
+        ]
+
+    def _get_patch_repl(
+        self,
+        num_patches: int,
+        patch_newline_mask: list[bool] | None,
+    ) -> tuple[str, list[int]]:
+        text = ""
+        token_ids = []
+        for i in range(num_patches):
+            assert len(patch_newline_mask) == num_patches
+            text += f"<patch_start>{self.patch_feature_placeholder}<patch_end>"
+            token_ids.extend(
+                [self.tokenizer.convert_tokens_to_ids("<patch_start>")] +
+                [self.image_token_id] * self.num_patch_feature_size +
+                [self.tokenizer.convert_tokens_to_ids("<patch_end>")])
+            if patch_newline_mask and patch_newline_mask[i]:
+                text += "<patch_newline>"
+                token_ids.append(
+                    self.tokenizer.convert_tokens_to_ids("<patch_newline>"))
+        return text, token_ids
+
+    def _get_image_repl(
+        self,
+        num_images: int,
+    ) -> tuple[str, list[int]]:
+        text = f"<im_start>{self.image_feature_placeholder}<im_end>"
+        token_ids = [
+            self.tokenizer.convert_tokens_to_ids("<im_start>")
+        ] + [self.image_token_id] * self.num_image_feature_size + [
+            self.tokenizer.convert_tokens_to_ids("<im_end>")
+        ]
+        return text * num_images, token_ids * num_images
+
+    def _get_image_repl_features(
+        self,
+        num_images: int,
+        num_patches: int,
+        patch_new_line_idx: Optional[list[bool]],
+    ) -> tuple[str, list[int]]:
+        if num_patches > 0:
+            patch_repl, patch_repl_ids = self._get_patch_repl(
+                num_patches, patch_new_line_idx)
+        else:
+            patch_repl = ""
+            patch_repl_ids = []
+        image_repl, image_repl_ids = self._get_image_repl(num_images)
+        return patch_repl + image_repl, patch_repl_ids + image_repl_ids
+
+    def replace_placeholder(self, text: str, placeholder: str,
+                            repls: list[str]) -> str:
+        parts = text.split(placeholder)
+
+        if len(parts) - 1 != len(repls):
+            raise ValueError(
+                "The number of placeholders does not match the number of replacements."  # noqa: E501
+            )
+
+        result = [parts[0]]
+        for i, repl in enumerate(repls):
+            result.append(repl)
+            result.append(parts[i + 1])
+
+        return "".join(result)
+
+    def __call__(
+        self,
+        text: Optional[Union[str, list[str]]] = None,
+        images: Optional[Union[Image.Image, list[Image.Image]]] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+    ) -> BatchFeature:
+        if text is None:
+            text = []
+        if not isinstance(text, list):
+            text = [text]
+        if images is None:
+            images = []
+        if not isinstance(images, list):
+            images = [images]
+
+        if len(images) == 0:
+            image_inputs = {}
+            text_inputs = self.tokenizer(text)
+        else:
+            splitted_images_data = self._split_images(images)
+            pixel_values_lst = []
+            patch_pixel_values_lst = []
+            patch_newline_mask_lst = []
+            image_repl_str_lst = []
+            image_repl_ids_lst = []
+            num_patches = []
+            for raw_img, img_patches, patch_newline_mask in splitted_images_data:  # noqa: E501
+                pixel_values_lst.extend(
+                    self._convert_images_to_pixel_values([raw_img]))
+
+                if len(img_patches) > 0:
+                    patch_pixel_values_lst.extend(
+                        self._convert_images_to_pixel_values(img_patches,
+                                                             is_patch=True))
+                num_patches.append(len(img_patches))
+
+                image_repl_str, image_repl_ids = self._get_image_repl_features(
+                    1, len(img_patches), patch_newline_mask)
+                image_repl_str_lst.append(image_repl_str)
+                image_repl_ids_lst.extend(image_repl_ids)
+
+                if patch_newline_mask is not None:
+                    patch_newline_mask_lst.extend(patch_newline_mask)
+
+            image_inputs = {
+                "pixel_values": torch.cat(pixel_values_lst),
+                "num_patches": num_patches,
+            }
+            if patch_pixel_values_lst:
+                image_inputs["patch_pixel_values"] = torch.cat(
+                    patch_pixel_values_lst)
+            if patch_newline_mask_lst:
+                image_inputs["patch_newline_mask"] = torch.tensor(
+                    patch_newline_mask_lst, dtype=torch.bool)
+
+            text = [
+                self.replace_placeholder(t, self.image_token,
+                                         image_repl_str_lst) for t in text
+            ]
+            text_inputs = self.tokenizer(text)
+
+        return BatchFeature(
+            {
+                **text_inputs,
+                **image_inputs,
+            },
+            tensor_type=return_tensors,
+        )
+
+
+class Step3VLProcessingInfo(BaseProcessingInfo):
+
+    def get_hf_processor(self) -> Step3VLProcessor:
+        return Step3VLProcessor(
+            self.get_hf_config(),
+            self.get_tokenizer(),
+        )
+
+    def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
+        return {"image": None}
+
+    def get_max_image_tokens(self) -> int:
+        hf_processor = self.get_hf_processor()
+        return hf_processor.get_num_image_tokens(
+            self.get_image_size_with_most_features().width,
+            self.get_image_size_with_most_features().height)
+
+    def get_mm_max_tokens_per_item(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> Mapping[str, int]:
+        return {"image": self.get_max_image_tokens()}
+
+    def get_image_size_with_most_features(self) -> ImageSize:
+        return ImageSize(3024, 3024)
+
+    def get_num_mm_tokens(self, mm_data: MultiModalDataDict) -> int:
+        if len(mm_data) != 1 or "image" not in mm_data:
+            raise ValueError(
+                "mm_data could only contain one key 'image' for steo1o")
+
+        image_data = mm_data["image"]
+        if not isinstance(image_data, (list, tuple)):
+            image_data = [image_data]
+
+        return sum(self.get_hf_processor().get_num_image_tokens(
+            img.width, img.height) for img in image_data)
+
+
+class Step3VLDummyInputsBuilder(BaseDummyInputsBuilder[Step3VLProcessingInfo]):
+
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        num_images = mm_counts.get("image", 0)
+        return "<im_patch>" * num_images
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> MultiModalDataDict:
+        target_width, target_height = \
+            self.info.get_image_size_with_most_features()
+        num_images = mm_counts.get("image", 0)
+
+        return {
+            "image":
+            self._get_dummy_images(width=target_width,
+                                   height=target_height,
+                                   num_images=num_images)
+        }
+
+
+class Step3VLMultiModalProcessor(BaseMultiModalProcessor[Step3VLProcessingInfo]
+                                 ):
+
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, Any],
+        out_mm_kwargs: MultiModalKwargs,
+    ) -> Sequence[PromptUpdate]:
+        hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
+        image_placeholder_token_id = hf_processor.image_token_id
+        batch_num_patches = out_mm_kwargs["num_patches"].tolist()
+
+        def get_replacement_step1o(item_idx: int):
+            img_out = out_mm_kwargs.get_item("image", item_idx)
+            num_patches = batch_num_patches[item_idx]
+            if num_patches > 0:
+                patch_newline_mask = img_out["patch_newline_mask"].data.tolist(
+                )
+                image_repl_ids = hf_processor._get_image_repl_features(
+                    1, num_patches, patch_newline_mask)[1]
+            else:
+                image_repl_ids = hf_processor._get_image_repl_features(
+                    1, 0, None)[1]
+            return PromptUpdateDetails.select_token_id(
+                seq=image_repl_ids,
+                embed_token_id=image_placeholder_token_id,
+            )
+
+        return [
+            PromptReplacement(
+                modality="image",
+                target=[image_placeholder_token_id],
+                replacement=get_replacement_step1o,
+            )
+        ]
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        num_patches = hf_inputs.get("num_patches", torch.empty(0))
+
+        return dict(
+            pixel_values=MultiModalFieldConfig.batched("image"),
+            patch_pixel_values=MultiModalFieldConfig.flat_from_sizes(
+                "image", num_patches),
+            num_patches=MultiModalFieldConfig.batched("image"),
+            patch_newline_mask=MultiModalFieldConfig.flat_from_sizes(
+                "image", num_patches),
+        )
+
+
+def get_abs_pos(abs_pos, tgt_size):
+    dim = abs_pos.size(-1)
+    abs_pos_new = abs_pos.squeeze(0)
+    cls_token, old_pos_embed = abs_pos_new[:1], abs_pos_new[1:]
+
+    src_size = int(math.sqrt(abs_pos_new.shape[0] - 1))
+    tgt_size = int(math.sqrt(tgt_size))
+    dtype = abs_pos.dtype
+
+    if src_size != tgt_size:
+        old_pos_embed = old_pos_embed.view(1, src_size, src_size,
+                                           dim).permute(0, 3, 1,
+                                                        2).contiguous()
+        old_pos_embed = old_pos_embed.to(torch.float32)
+        new_pos_embed = F.interpolate(
+            old_pos_embed,
+            size=(tgt_size, tgt_size),
+            mode='bicubic',
+            antialias=True,
+            align_corners=False,
+        ).to(dtype)
+        new_pos_embed = new_pos_embed.permute(0, 2, 3, 1)
+        new_pos_embed = new_pos_embed.view(tgt_size * tgt_size, dim)
+        vision_pos_embed = torch.cat([cls_token, new_pos_embed], dim=0)
+        vision_pos_embed = vision_pos_embed.view(1, tgt_size * tgt_size + 1,
+                                                 dim)
+        return vision_pos_embed
+    else:
+        return abs_pos
+
+
+class Step3VisionEmbeddings(nn.Module):
+
+    def __init__(self, config: Step3VisionEncoderConfig):
+        super().__init__()
+        self.config = config
+        self.embed_dim = config.hidden_size
+        self.image_size = config.image_size
+        self.patch_size = config.patch_size
+
+        self.class_embedding = nn.Parameter(torch.randn(1, self.embed_dim))
+
+        self.patch_embedding = nn.Conv2d(
+            in_channels=config.num_channels,
+            out_channels=self.embed_dim,
+            kernel_size=self.patch_size,
+            stride=self.patch_size,
+            bias=True,
+        )
+
+        self.num_patches = (self.image_size // self.patch_size)**2
+        self.pad_tp_size = 4  # hard code for padding
+        # To load the pretrained weights, we still use P+1 as the seqlen
+        self.position_embedding = torch.nn.Embedding(self.num_patches + 1,
+                                                     self.embed_dim)
+        self.register_buffer("position_ids",
+                             torch.arange(self.num_patches + 1).expand(
+                                 (1, -1)),
+                             persistent=False)
+
+    def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
+        batch_size = pixel_values.shape[0]
+        patch_embeds = self.patch_embedding(
+            pixel_values)  # shape = [*, width, grid, grid]
+        patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
+
+        # pad
+        class_embeds = self.class_embedding.expand(batch_size, 1, -1)
+        embeddings = torch.cat([class_embeds, patch_embeds], dim=1)
+        embeddings = embeddings + get_abs_pos(
+            self.position_embedding(self.position_ids), patch_embeds.size(1))
+        embeddings = torch.cat([
+            embeddings[:, 0, :].unsqueeze(1).repeat(1, self.pad_tp_size - 1,
+                                                    1), embeddings
+        ],
+                               dim=1)
+        return embeddings
+
+
+class Step3VisionAttention(nn.Module):
+    """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+    def __init__(self,
+                 config,
+                 quant_config: Optional[QuantizationConfig] = None,
+                 prefix: str = ""):
+        super().__init__()
+        self.config = config
+        self.embed_dim = config.hidden_size
+        self.total_num_heads = config.num_attention_heads
+        self.head_dim = self.embed_dim // self.total_num_heads
+
+        self.scale = self.head_dim**-0.5
+
+        tp_size = get_tensor_model_parallel_world_size()
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+        self.qkv_proj = QKVParallelLinear(self.embed_dim,
+                                          self.head_dim,
+                                          self.total_num_heads,
+                                          bias=True,
+                                          quant_config=quant_config,
+                                          prefix=prefix)
+        self.out_proj = RowParallelLinear(self.embed_dim,
+                                          self.embed_dim,
+                                          bias=True,
+                                          quant_config=quant_config,
+                                          prefix=prefix)
+
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads,
+                           self.head_dim).transpose(1, 2).contiguous()
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+    ):
+        """Input shape: Batch x Time x Channel"""
+        bsz, tgt_len, _ = hidden_states.size()
+
+        # get query proj
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.chunk(chunks=3, dim=-1)
+        q = q.view(bsz, tgt_len, self.num_heads, self.head_dim)
+        k = k.view(bsz, tgt_len, self.num_heads, self.head_dim)
+        v = v.view(bsz, tgt_len, self.num_heads, self.head_dim)
+        q = q.transpose(1, 2)
+        k = k.transpose(1, 2)
+        v = v.transpose(1, 2)
+        attn_output = F.scaled_dot_product_attention(q,
+                                                     k,
+                                                     v,
+                                                     scale=self.scale,
+                                                     is_causal=False)
+        attn_output = attn_output.transpose(1, 2).reshape(
+            bsz, tgt_len, self.num_heads * self.head_dim)
+
+        attn_output, _ = self.out_proj(attn_output)
+
+        return attn_output
+
+
+class Step3VisionMLP(nn.Module):
+
+    def __init__(self,
+                 config,
+                 quant_config: Optional[QuantizationConfig] = None,
+                 prefix: str = ""):
+        super().__init__()
+        self.config = config
+        self.activation_fn = get_act_fn(config.hidden_act)
+        self.fc1 = ColumnParallelLinear(config.hidden_size,
+                                        config.intermediate_size,
+                                        bias=True,
+                                        quant_config=quant_config,
+                                        prefix=prefix)
+        self.fc2 = RowParallelLinear(config.intermediate_size,
+                                     config.hidden_size,
+                                     bias=True,
+                                     quant_config=quant_config,
+                                     prefix=prefix)
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        hidden_states, _ = self.fc1(hidden_states)
+        hidden_states = self.activation_fn(hidden_states)
+        hidden_states, _ = self.fc2(hidden_states)
+        return hidden_states
+
+
+class Step3VisionEncoderLayer(nn.Module):
+
+    def __init__(self,
+                 config: Step3VisionEncoderConfig,
+                 quant_config: Optional[QuantizationConfig] = None,
+                 prefix: str = ""):
+        super().__init__()
+        self.embed_dim = config.hidden_size
+        self.self_attn = Step3VisionAttention(config,
+                                              quant_config,
+                                              prefix=f"{prefix}.self_attn")
+        self.layer_norm1 = nn.LayerNorm(self.embed_dim,
+                                        eps=config.layer_norm_eps)
+        self.mlp = Step3VisionMLP(config, quant_config, prefix=f"{prefix}.mlp")
+        self.layer_norm2 = nn.LayerNorm(self.embed_dim,
+                                        eps=config.layer_norm_eps)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.FloatTensor:
+        hidden_states = hidden_states + self.layer_norm1(
+            self.self_attn(hidden_states))
+        hidden_states = hidden_states + self.layer_norm2(
+            self.mlp(hidden_states))
+        return hidden_states
+
+
+class Step3VisionEncoder(nn.Module):
+
+    def __init__(self,
+                 config: Step3VisionEncoderConfig,
+                 quant_config: Optional[QuantizationConfig] = None,
+                 prefix: str = ""):
+        super().__init__()
+        self.config = config
+        self.layers = nn.ModuleList([
+            Step3VisionEncoderLayer(config,
+                                    quant_config,
+                                    prefix=f"{prefix}.layers.{i}")
+            for i in range(config.num_hidden_layers)
+        ])
+
+    def forward(
+        self,
+        inputs_embeds,
+    ):
+        hidden_states = inputs_embeds
+        for encoder_layer in self.layers:
+            hidden_states = encoder_layer(hidden_states)
+        return hidden_states
+
+
+class Step3VisionTransformer(nn.Module):
+
+    def __init__(self,
+                 config: Step3VisionEncoderConfig,
+                 quant_config: Optional[QuantizationConfig] = None,
+                 prefix: str = ""):
+        super().__init__()
+        self.config = config
+        self.image_size = config.image_size
+        self.embeddings = Step3VisionEmbeddings(config)
+        self.transformer = Step3VisionEncoder(config,
+                                              quant_config,
+                                              prefix=f"{prefix}.transformer")
+
+    def forward(
+        self,
+        pixel_values: torch.Tensor,
+    ):
+        hidden_states = self.embeddings(pixel_values)
+        hidden_states = self.transformer(inputs_embeds=hidden_states)
+        return hidden_states
+
+
+@MULTIMODAL_REGISTRY.register_processor(Step3VLMultiModalProcessor,
+                                        info=Step3VLProcessingInfo,
+                                        dummy_inputs=Step3VLDummyInputsBuilder)
+class Step3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
+                                      SupportsPP):
+
+    hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={
+        "model.": "language_model.model.",
+        "lm_head.": "language_model.lm_head.",
+    })
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:
+        if modality.startswith("image"):
+            return "<im_patch>"
+
+        raise ValueError("Only image modality is supported")
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        multimodal_config = vllm_config.model_config.multimodal_config
+
+        self.config = config
+        self.multimodal_config = multimodal_config
+
+        self.vision_model = Step3VisionTransformer(config.vision_config,
+                                                   None,
+                                                   prefix=maybe_prefix(
+                                                       prefix, "vision_model"))
+        self.vit_downsampler = nn.Conv2d(
+            config.vision_config.hidden_size,
+            config.vision_config.output_hidden_size,
+            kernel_size=2,
+            stride=config.understand_projector_stride)
+        self.vit_downsampler2 = nn.Conv2d(
+            config.vision_config.output_hidden_size,
+            config.vision_config.output_hidden_size * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+        )
+        self.vit_large_projector = nn.Linear(
+            config.vision_config.output_hidden_size * 2,
+            config.hidden_size,
+            bias=config.projector_bias,
+        )
+        self.language_model = init_vllm_registered_model(
+            vllm_config=vllm_config,
+            hf_config=config.text_config,
+            prefix=maybe_prefix(prefix, "language_model"))
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors)
+
+    @cached_property
+    def sampler(self):
+        if hasattr(self.language_model, "sampler"):
+            return self.language_model.sampler
+
+        return get_sampler()
+
+    @property
+    def device(self):
+        return next(self.parameters()).device
+
+    @property
+    def dtype(self):
+        return next(self.parameters()).dtype
+
+    def _parse_and_validate_image_input(
+            self, **kwargs: object) -> Optional[Step3VLImageInputs]:
+        pixel_values = kwargs.pop("pixel_values", None)
+        patch_pixel_values = kwargs.pop("patch_pixel_values", None)
+        num_patches = kwargs.pop("num_patches", None)
+        image_embeds = kwargs.pop("image_embeds", None)
+
+        if pixel_values is None and image_embeds is None:
+            return None
+
+        if pixel_values is not None:
+            pixel_values = flatten_bn(pixel_values, concat=True)
+            if pixel_values.dim() >= 3:
+                pixel_values = pixel_values.view(-1, *pixel_values.shape[-3:])
+            if patch_pixel_values is not None:
+                patch_pixel_values = flatten_bn(patch_pixel_values,
+                                                concat=True)
+                patch_pixel_values = patch_pixel_values.view(
+                    -1, *patch_pixel_values.shape[-3:])
+                # Handle empty patch_pixel_values by setting to None
+                if patch_pixel_values.shape[0] == 0:
+                    patch_pixel_values = None
+            num_patches = flatten_bn(num_patches, concat=True).tolist()
+
+            return Step3VLImagePixelInputs(
+                type="pixel_values",
+                pixel_values=pixel_values.to(self.dtype).to(self.device),
+                patch_pixel_values=patch_pixel_values.to(self.dtype).to(
+                    self.device) if patch_pixel_values is not None else None,
+                num_patches=num_patches,
+            )
+
+        if image_embeds is not None:
+            if image_embeds.dim() == 2 or image_embeds.dim() >= 3:
+                image_embeds = image_embeds.view(-1, image_embeds.shape[-1])
+            else:
+                raise ValueError(
+                    f"Unexpected shape for image_embeds: {image_embeds.shape}")
+
+            return Step3VLImageEmbeddingInputs(
+                type="image_embeds",
+                image_embeds=image_embeds.to(self.dtype).to(self.device),
+            )
+        return None
+
+    def _process_image_features(self,
+                                image_features: torch.Tensor) -> torch.Tensor:
+        B, P = image_features.shape[:2]
+        HW = int(sqrt(P))
+        image_features = image_features.permute(0, 2, 1).view(B, -1, HW, HW)
+        image_features = self.vit_downsampler(image_features)
+        image_features = self.vit_downsampler2(image_features)
+        n_dim = image_features.size(1)
+        image_features = image_features.view(B, n_dim, -1).permute(0, 2, 1)
+        image_features = self.vit_large_projector(image_features)
+        return image_features
+
+    def _get_vision_model_output(self,
+                                 input_tensor: torch.Tensor) -> torch.Tensor:
+        return self.vision_model(input_tensor)[:, 4:]
+
+    def _process_image_input(
+            self, image_input: Step3VLImageInputs) -> tuple[torch.Tensor, ...]:
+
+        if image_input["type"] == "image_embeds":
+            image_features = image_input["image_embeds"]
+        else:
+            image_features = self._get_vision_model_output(
+                image_input["pixel_values"])
+            patch_image_features = self._get_vision_model_output(
+                image_input["patch_pixel_values"]
+            ) if image_input["patch_pixel_values"] is not None else None
+            num_patches = image_input["num_patches"]
+
+        image_features = self._process_image_features(image_features)
+        patch_image_features = self._process_image_features(
+            patch_image_features) if patch_image_features is not None else None
+
+        merged_image_features = []
+        cur_patch_idx = 0
+        for i, num_patch in enumerate(num_patches):
+            cur_feature = []
+            if num_patch > 0:
+                patch_slice = patch_image_features[
+                    cur_patch_idx:cur_patch_idx + num_patch]
+                cur_feature.append(patch_slice.view(-1, patch_slice.shape[-1]))
+            cur_feature.append(image_features[i].view(
+                -1, image_features.shape[-1]))
+            cur_patch_idx += num_patch
+            merged_image_features.append(
+                torch.cat(cur_feature) if len(cur_feature) >
+                1 else cur_feature[0])
+        return merged_image_features
+
+    def get_multimodal_embeddings(self, **kwargs) -> Optional[NestedTensors]:
+        image_input = self._parse_and_validate_image_input(**kwargs)
+        if image_input is None:
+            return None
+        vision_embeddings = self._process_image_input(image_input)
+        return vision_embeddings
+
+    def get_input_embeddings(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
+    ) -> torch.Tensor:
+        if multimodal_embeddings is None:
+            inputs_embeds = self.language_model.model.get_input_embeddings(
+                input_ids)
+        else:
+            is_text = input_ids != self.config.image_token_id
+            text_ids = input_ids[is_text]
+            text_embeds = self.language_model.model.get_input_embeddings(
+                text_ids)
+            inputs_embeds = torch.empty(input_ids.shape[0],
+                                        text_embeds.shape[-1],
+                                        dtype=text_embeds.dtype,
+                                        device=text_embeds.device)
+            inputs_embeds[is_text] = text_embeds
+            inputs_embeds = merge_multimodal_embeddings(
+                input_ids, inputs_embeds, multimodal_embeddings,
+                self.config.image_token_id)
+        return inputs_embeds
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: Optional[IntermediateTensors] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        **kwargs: object,
+    ) -> Union[torch.Tensor, IntermediateTensors]:
+        if intermediate_tensors is not None:
+            inputs_embeds = None
+        elif inputs_embeds is None:
+            vision_embeddings = self.get_multimodal_embeddings(**kwargs)
+            # always pass the input via `inputs_embeds`
+            # to make sure the computation graph is consistent
+            inputs_embeds = self.get_input_embeddings(input_ids,
+                                                      vision_embeddings)
+            input_ids = None
+
+        hidden_states = self.language_model(input_ids,
+                                            positions,
+                                            intermediate_tensors,
+                                            inputs_embeds=inputs_embeds)
+
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[torch.Tensor]:
+        return self.language_model.compute_logits(hidden_states,
+                                                  sampling_metadata)
+
+    def sample(
+        self,
+        logits: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[SamplerOutput]:
+        return self.language_model.sample(logits, sampling_metadata)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
+        loader = AutoWeightsLoader(self)
+        loaded_weights = loader.load_weights(weights,
+                                             mapper=self.hf_to_vllm_mapper)
+        return loaded_weights
diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py
index d61e4f11dfa29..1c3f78f2edbfb 100644
--- a/vllm/reasoning/__init__.py
+++ b/vllm/reasoning/__init__.py
@@ -8,6 +8,7 @@ from .granite_reasoning_parser import GraniteReasoningParser
 from .hunyuan_a13b_reasoning_parser import HunyuanA13BReasoningParser
 from .mistral_reasoning_parser import MistralReasoningParser
 from .qwen3_reasoning_parser import Qwen3ReasoningParser
+from .step3_reasoning_parser import Step3ReasoningParser
 
 __all__ = [
     "ReasoningParser",
@@ -18,4 +19,5 @@ __all__ = [
     "Qwen3ReasoningParser",
     "Glm4MoeModelReasoningParser",
     "MistralReasoningParser",
+    "Step3ReasoningParser",
 ]
diff --git a/vllm/reasoning/step3_reasoning_parser.py b/vllm/reasoning/step3_reasoning_parser.py
new file mode 100644
index 0000000000000..f642ea977c580
--- /dev/null
+++ b/vllm/reasoning/step3_reasoning_parser.py
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Sequence
+from typing import Optional, Union
+
+import regex as re
+from transformers import PreTrainedTokenizerBase
+
+from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
+                                              DeltaMessage)
+from vllm.logger import init_logger
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+logger = init_logger(__name__)
+
+
+@ReasoningParserManager.register_module("step3")
+class Step3ReasoningParser(ReasoningParser):
+    """
+    Reasoning parser for Step3 model.
+
+    The Step3 model uses </think> token to denote the end of reasoning 
+    text. This parser extracts all content before </think> as reasoning content.
+    """
+
+    def __init__(self, tokenizer: PreTrainedTokenizerBase):
+        super().__init__(tokenizer)
+        self.think_end_token = "</think>"
+
+        self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}",
+                                          re.DOTALL)
+
+        if not self.model_tokenizer:
+            raise ValueError(
+                "The model tokenizer must be passed to the ReasoningParser "
+                "constructor during construction.")
+
+        self.think_end_token_id = self.vocab.get(self.think_end_token)
+        if self.think_end_token_id is None:
+            raise RuntimeError(
+                "Step3 reasoning parser could not locate think end "
+                "token in the tokenizer!")
+
+    def extract_reasoning_content_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+    ) -> Union[DeltaMessage, None]:
+        """
+        Extract reasoning content from a delta message.
+        Handles streaming output where previous + delta = current.
+        Uses token IDs for faster processing.
+        For text "abc</think>xyz":
+        - 'abc' goes to reasoning_content
+        - 'xyz' goes to content
+        """
+        # Skip single special token
+        if len(delta_token_ids
+               ) == 1 and delta_token_ids[0] == self.think_end_token_id:
+            return None
+
+        if self.think_end_token_id in delta_token_ids:
+            # </think> in delta, extract reasoning content and remaining content
+            end_index = delta_text.find(self.think_end_token)
+            reasoning_content = delta_text[:end_index]
+            content = delta_text[end_index + len(self.think_end_token):]
+            return DeltaMessage(reasoning_content=reasoning_content,
+                                content=content if content else None)
+        elif self.think_end_token_id in previous_token_ids:
+            # </think> already seen in previous text, everything is content
+            return DeltaMessage(content=delta_text)
+        else:
+            # No </think> seen yet, everything is reasoning
+            return DeltaMessage(reasoning_content=delta_text)
+
+    def extract_reasoning_content(
+            self, model_output: str, request: ChatCompletionRequest
+    ) -> tuple[Optional[str], Optional[str]]:
+
+        # Check if the model output contains the </think> token
+        if self.think_end_token not in model_output:
+            # If no </think> token, everything is reasoning content
+            return model_output, None
+        else:
+            # Find the first occurrence of </think>
+            end_index = model_output.find(self.think_end_token)
+            reasoning_content = model_output[:end_index]
+
+            # Content after </think> token
+            content = model_output[end_index + len(self.think_end_token):]
+
+            if len(content) == 0:
+                content = None
+
+            return reasoning_content, content
+
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        return self.think_end_token_id in input_ids
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        if self.think_end_token_id not in input_ids[:-1]:
+            return []
+        else:
+            return input_ids[input_ids.index(self.think_end_token_id) + 1:]
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 4ce56cb3a6aac..fcaa48c1392a3 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -35,7 +35,8 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DeepseekVLV2Config,
                                              MllamaConfig, MLPSpeculatorConfig,
                                              Nemotron_Nano_VL_Config,
                                              NemotronConfig, NVLM_D_Config,
-                                             RWConfig, UltravoxConfig)
+                                             RWConfig, Step3TextConfig,
+                                             Step3VLConfig, UltravoxConfig)
 # yapf: enable
 from vllm.transformers_utils.configs.mistral import adapt_config_dict
 from vllm.transformers_utils.utils import check_gguf_file
@@ -83,6 +84,8 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
     "nemotron": NemotronConfig,
     "NVLM_D": NVLM_D_Config,
     "ultravox": UltravoxConfig,
+    "step3_vl": Step3VLConfig,
+    "step3_text": Step3TextConfig,
     **_CONFIG_REGISTRY_OVERRIDE_HF
 }
 
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index 7c7d859e4a325..96733da726181 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -24,6 +24,9 @@ from vllm.transformers_utils.configs.nemotron import NemotronConfig
 from vllm.transformers_utils.configs.nemotron_h import NemotronHConfig
 from vllm.transformers_utils.configs.nemotron_vl import Nemotron_Nano_VL_Config
 from vllm.transformers_utils.configs.nvlm_d import NVLM_D_Config
+from vllm.transformers_utils.configs.step3_vl import (Step3TextConfig,
+                                                      Step3VisionEncoderConfig,
+                                                      Step3VLConfig)
 from vllm.transformers_utils.configs.ultravox import UltravoxConfig
 
 __all__ = [
@@ -42,4 +45,7 @@ __all__ = [
     "Nemotron_Nano_VL_Config",
     "NVLM_D_Config",
     "UltravoxConfig",
+    "Step3VLConfig",
+    "Step3VisionEncoderConfig",
+    "Step3TextConfig",
 ]
diff --git a/vllm/transformers_utils/configs/step3_vl.py b/vllm/transformers_utils/configs/step3_vl.py
new file mode 100644
index 0000000000000..fe3c72de69d28
--- /dev/null
+++ b/vllm/transformers_utils/configs/step3_vl.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Any, Optional, Union
+
+from transformers.configuration_utils import PretrainedConfig
+
+
+class Step3VisionEncoderConfig(PretrainedConfig):
+    model_type = "step3_vision_encoder"
+
+    def __init__(
+        self,
+        hidden_size=1792,
+        intermediate_size=3072,
+        output_hidden_size=4096,
+        num_hidden_layers=63,
+        num_attention_heads=16,
+        num_channels=3,
+        image_size=728,
+        patch_size=14,
+        hidden_act="quick_gelu",
+        layer_norm_eps=1e-5,
+        **kwargs,
+    ):
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.output_hidden_size = output_hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_channels = num_channels
+        self.patch_size = patch_size
+        self.image_size = image_size
+        self.layer_norm_eps = layer_norm_eps
+        self.hidden_act = hidden_act
+        super().__init__(**kwargs)
+
+
+class Step3TextConfig(PretrainedConfig):
+    model_type = "step3_text"
+    architectures = ["Step3TextForCausalLM"]
+
+    def __init__(
+        self,
+        hidden_size: int = 7168,
+        intermediate_size: int = 18432,
+        num_attention_heads: int = 64,
+        num_attention_groups: int = 1,
+        num_hidden_layers: int = 61,
+        max_seq_len: int = 65536,
+        vocab_size: int = 128815,
+        rms_norm_eps: float = 1e-5,
+        moe_intermediate_size: int = 5120,
+        moe_num_experts: int = 48,
+        moe_top_k: int = 3,
+        rope_theta: float = 500000,
+        rope_scaling: Optional[dict[str, Any]] = None,
+        max_position_embedding: int = 65536,
+        share_expert_dim: int = 5120,
+        share_q_dim: int = 2048,
+        head_dim: int = 256,
+        norm_expert_weight: bool = False,
+        moe_layers_enum: tuple[int,
+                               ...] = (4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                                       15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                       25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+                                       35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+                                       45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
+                                       55, 56, 57, 58, 59),
+        **kwargs,
+    ) -> None:
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_attention_heads = num_attention_heads
+        self.num_attention_groups = num_attention_groups
+        self.num_hidden_layers = num_hidden_layers
+        self.max_seq_len = max_seq_len
+        self.vocab_size = vocab_size
+        self.rms_norm_eps = rms_norm_eps
+        self.moe_intermediate_size = moe_intermediate_size
+        self.moe_num_experts = moe_num_experts
+        self.moe_top_k = moe_top_k
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self.max_position_embedding = max_position_embedding
+        self.share_expert_dim = share_expert_dim
+        self.share_q_dim = share_q_dim
+        self.head_dim = head_dim
+        self.norm_expert_weight = norm_expert_weight
+        self.moe_layers_enum = moe_layers_enum
+
+        super().__init__(**kwargs)
+
+
+class Step3VLConfig(PretrainedConfig):
+    model_type = "step3_vl"
+
+    def __init__(
+        self,
+        vision_config: Optional[Union[dict, Step3VisionEncoderConfig]] = None,
+        text_config: Optional[Union[dict, Step3TextConfig]] = None,
+        understand_projector_stride: int = 1,
+        projector_bias: bool = True,
+        image_token_id: int = 128001,
+        **kwargs,
+    ) -> None:
+        if vision_config is None:
+            vision_config = Step3VisionEncoderConfig()
+        elif isinstance(vision_config, dict):
+            vision_config = Step3VisionEncoderConfig(**vision_config)
+        self.vision_config = vision_config
+
+        if text_config is None:
+            text_config = Step3TextConfig()
+        elif isinstance(text_config, dict):
+            text_config = Step3TextConfig(**text_config)
+        self.text_config = text_config
+
+        self.understand_projector_stride = understand_projector_stride
+        self.projector_bias = projector_bias
+        self.hidden_size = text_config.hidden_size
+        self.image_token_id = image_token_id
+
+        super().__init__(**kwargs)

From 7349d5268bf70b7a530c1e649884e4f926615f8e Mon Sep 17 00:00:00 2001
From: Zhengxu Chen <zhxchen17@outlook.com>
Date: Thu, 31 Jul 2025 12:46:07 -0400
Subject: [PATCH 092/224] [ez] Remove a trailing space from
 compilation/decorators.py (#22028)

---
 vllm/compilation/decorators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
index f3592324d8cfa..1370862d580a5 100644
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -108,7 +108,7 @@ def support_torch_compile(
     During runtime, when we actually mark dimensions of tensors,
      it depends on the value of arguments:
 
-    - if it is a single integer (can be negative), the corresponding dimension 
+    - if it is a single integer (can be negative), the corresponding dimension
         of the argument will be marked as dynamic.
     - if it is `None`, ignored.
     - if it is `IntermediateTensors`, all the tensors in the intermediate

From 58bb902186a87007deeeef2d2af02ed2b13bb182 Mon Sep 17 00:00:00 2001
From: Doug Smith <dosmith@redhat.com>
Date: Thu, 31 Jul 2025 12:52:48 -0400
Subject: [PATCH 093/224] fix(setup): improve precompiled wheel setup for
 Docker builds (#22025)

Signed-off-by: dougbtv <dosmith@redhat.com>
---
 docker/Dockerfile     |   1 +
 requirements/test.txt |  24 +++--
 setup.py              | 203 ++++++++++++++++++------------------------
 3 files changed, 104 insertions(+), 124 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 43522ef8fb8dd..69aeee67a4300 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -370,6 +370,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     fi
 
 # Install vllm wheel first, so that torch etc will be installed.
+# !bang
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
     --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system dist/*.whl --verbose \
diff --git a/requirements/test.txt b/requirements/test.txt
index d45048aae5809..4aaca2afea266 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -22,9 +22,7 @@ aiohttp==3.10.11
 aiohttp-cors==0.8.1
     # via ray
 aiosignal==1.3.1
-    # via
-    #   aiohttp
-    #   ray
+    # via aiohttp
 albucore==0.0.16
     # via terratorch
 albumentations==1.4.6
@@ -139,7 +137,7 @@ contourpy==1.3.0
     # via matplotlib
 cramjam==2.9.0
     # via fastparquet
-cupy-cuda12x==13.3.0
+cupy-cuda12x==13.5.1
     # via ray
 cycler==0.12.1
     # via matplotlib
@@ -226,7 +224,6 @@ frozenlist==1.5.0
     # via
     #   aiohttp
     #   aiosignal
-    #   ray
 fsspec==2024.9.0
     # via
     #   datasets
@@ -603,10 +600,18 @@ opencv-python-headless==4.11.0.86
 opentelemetry-api==1.35.0
     # via
     #   mlflow-skinny
+    #   opentelemetry-exporter-prometheus
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
+opentelemetry-exporter-prometheus==0.56b0
+    # via ray
+opentelemetry-proto==1.36.0
+    # via ray
 opentelemetry-sdk==1.35.0
-    # via mlflow-skinny
+    # via
+    #   mlflow-skinny
+    #   opentelemetry-exporter-prometheus
+    #   ray
 opentelemetry-semantic-conventions==0.56b0
     # via opentelemetry-sdk
 packaging==24.2
@@ -697,7 +702,9 @@ pqdm==0.2.0
 pretrainedmodels==0.7.4
     # via segmentation-models-pytorch
 prometheus-client==0.22.0
-    # via ray
+    # via
+    #   opentelemetry-exporter-prometheus
+    #   ray
 propcache==0.2.0
     # via yarl
 proto-plus==1.26.1
@@ -707,6 +714,7 @@ protobuf==5.28.3
     #   google-api-core
     #   googleapis-common-protos
     #   mlflow-skinny
+    #   opentelemetry-proto
     #   proto-plus
     #   ray
     #   tensorboardx
@@ -854,7 +862,7 @@ rasterio==1.4.3
     #   rioxarray
     #   terratorch
     #   torchgeo
-ray==2.43.0
+ray==2.48.0
     # via -r requirements/test.in
 redis==5.2.0
     # via tensorizer
diff --git a/setup.py b/setup.py
index bf3391e2db19e..6d615d122d69e 100644
--- a/setup.py
+++ b/setup.py
@@ -282,10 +282,69 @@ class cmake_build_ext(build_ext):
             self.copy_file(file, dst_file)
 
 
-class repackage_wheel(build_ext):
+class precompiled_wheel_utils:
     """Extracts libraries and other files from an existing wheel."""
 
-    def get_base_commit_in_main_branch(self) -> str:
+    @staticmethod
+    def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
+        import tempfile
+        import zipfile
+
+        temp_dir = None
+        try:
+            if not os.path.isfile(wheel_url_or_path):
+                wheel_filename = wheel_url_or_path.split("/")[-1]
+                temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
+                wheel_path = os.path.join(temp_dir, wheel_filename)
+                print(f"Downloading wheel from {wheel_url_or_path} "
+                      f"to {wheel_path}")
+                from urllib.request import urlretrieve
+                urlretrieve(wheel_url_or_path, filename=wheel_path)
+            else:
+                wheel_path = wheel_url_or_path
+                print(f"Using existing wheel at {wheel_path}")
+
+            package_data_patch = {}
+
+            with zipfile.ZipFile(wheel_path) as wheel:
+                files_to_copy = [
+                    "vllm/_C.abi3.so",
+                    "vllm/_moe_C.abi3.so",
+                    "vllm/_flashmla_C.abi3.so",
+                    "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
+                    "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
+                    "vllm/cumem_allocator.abi3.so",
+                ]
+
+                compiled_regex = re.compile(
+                    r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
+                file_members = list(
+                    filter(lambda x: x.filename in files_to_copy,
+                           wheel.filelist))
+                file_members += list(
+                    filter(lambda x: compiled_regex.match(x.filename),
+                           wheel.filelist))
+
+                for file in file_members:
+                    print(f"[extract] {file.filename}")
+                    target_path = os.path.join(".", file.filename)
+                    os.makedirs(os.path.dirname(target_path), exist_ok=True)
+                    with wheel.open(file.filename) as src, open(
+                            target_path, "wb") as dst:
+                        shutil.copyfileobj(src, dst)
+
+                    pkg = os.path.dirname(file.filename).replace("/", ".")
+                    package_data_patch.setdefault(pkg, []).append(
+                        os.path.basename(file.filename))
+
+            return package_data_patch
+        finally:
+            if temp_dir is not None:
+                print(f"Removing temporary directory {temp_dir}")
+                shutil.rmtree(temp_dir)
+
+    @staticmethod
+    def get_base_commit_in_main_branch() -> str:
         # Force to use the nightly wheel. This is mainly used for CI testing.
         if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
             return "nightly"
@@ -334,115 +393,6 @@ class repackage_wheel(build_ext):
                 "wheel may not be compatible with your dev branch: %s", err)
             return "nightly"
 
-    def run(self) -> None:
-        assert _is_cuda(
-        ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
-
-        wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
-        if wheel_location is None:
-            base_commit = self.get_base_commit_in_main_branch()
-            wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
-            # Fallback to nightly wheel if latest commit wheel is unavailable,
-            # in this rare case, the nightly release CI hasn't finished on main.
-            if not is_url_available(wheel_location):
-                wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
-
-        import zipfile
-
-        if os.path.isfile(wheel_location):
-            wheel_path = wheel_location
-            print(f"Using existing wheel={wheel_path}")
-        else:
-            # Download the wheel from a given URL, assume
-            # the filename is the last part of the URL
-            wheel_filename = wheel_location.split("/")[-1]
-
-            import tempfile
-
-            # create a temporary directory to store the wheel
-            temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
-            wheel_path = os.path.join(temp_dir, wheel_filename)
-            print(f"Downloading wheel from {wheel_location} to {wheel_path}")
-            from urllib.request import urlretrieve
-            try:
-                urlretrieve(wheel_location, filename=wheel_path)
-            except Exception as e:
-                from setuptools.errors import SetupError
-                raise SetupError(
-                    f"Failed to get vLLM wheel from {wheel_location}") from e
-
-        # Set the dist_dir for Docker build context
-        dist_dir = ("/workspace/dist"
-                    if envs.VLLM_DOCKER_BUILD_CONTEXT else "dist")
-        os.makedirs(dist_dir, exist_ok=True)
-
-        # Extract only necessary compiled .so files from precompiled wheel
-        with zipfile.ZipFile(wheel_path) as wheel:
-            # Get version from METADATA (optional, mostly useful for logging)
-            metadata_file = next((n for n in wheel.namelist()
-                                  if n.endswith(".dist-info/METADATA")), None)
-            if not metadata_file:
-                raise RuntimeError(
-                    "Could not find METADATA in precompiled wheel.")
-            metadata = wheel.read(metadata_file).decode()
-            version_line = next((line for line in metadata.splitlines()
-                                 if line.startswith("Version: ")), None)
-            if not version_line:
-                raise RuntimeError(
-                    "Could not determine version from METADATA.")
-            version = version_line.split(": ")[1].strip()
-
-            print(f"Extracting precompiled kernels from vLLM wheel version: "
-                  f"{version}")
-
-            # List of compiled shared objects to extract
-            files_to_copy = [
-                "vllm/_C.abi3.so",
-                "vllm/_moe_C.abi3.so",
-                "vllm/_flashmla_C.abi3.so",
-                "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
-                "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
-                "vllm/cumem_allocator.abi3.so",
-            ]
-
-            file_members = list(
-                filter(lambda x: x.filename in files_to_copy, wheel.filelist))
-            compiled_regex = re.compile(
-                r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
-            file_members += list(
-                filter(lambda x: compiled_regex.match(x.filename),
-                       wheel.filelist))
-
-            for file in file_members:
-                print(f"Extracting and including {file.filename} "
-                      "from existing wheel")
-                package_name = os.path.dirname(file.filename).replace("/", ".")
-                file_name = os.path.basename(file.filename)
-
-                if package_name not in package_data:
-                    package_data[package_name] = []
-
-                output_base = (dist_dir
-                               if envs.VLLM_DOCKER_BUILD_CONTEXT else ".")
-                target_path = os.path.join(output_base, file.filename)
-                os.makedirs(os.path.dirname(target_path), exist_ok=True)
-                with wheel.open(file.filename) as src, open(target_path,
-                                                            "wb") as dst:
-                    shutil.copyfileobj(src, dst)
-
-                package_data[package_name].append(file_name)
-
-        # Copy wheel into dist dir for Docker to consume (e.g., via --mount)
-        if envs.VLLM_DOCKER_BUILD_CONTEXT:
-            arch_tag = "cp38-abi3-manylinux1_x86_64"
-            corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
-            final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
-
-            print(
-                "Docker build context detected, copying precompiled wheel to "
-                f"{final_wheel_path}")
-            shutil.copy2(wheel_path, final_wheel_path)
-
 
 def _no_device() -> bool:
     return VLLM_TARGET_DEVICE == "empty"
@@ -676,16 +626,37 @@ package_data = {
     ]
 }
 
+# If using precompiled, extract and patch package_data (in advance of setup)
+if envs.VLLM_USE_PRECOMPILED:
+    assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
+    wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
+    if wheel_location is not None:
+        wheel_url = wheel_location
+    else:
+        base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
+        wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+        from urllib.request import urlopen
+        try:
+            with urlopen(wheel_url) as resp:
+                if resp.status != 200:
+                    wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+        except Exception as e:
+            print(f"[warn] Falling back to nightly wheel: {e}")
+            wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+
+    patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
+        wheel_url)
+    for pkg, files in patch.items():
+        package_data.setdefault(pkg, []).extend(files)
+
 if _no_device():
     ext_modules = []
 
-if not ext_modules:
+if not ext_modules or envs.VLLM_USE_PRECOMPILED:
+    # Disable build_ext when using precompiled wheel
     cmdclass = {}
 else:
-    cmdclass = {
-        "build_ext":
-        repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
-    }
+    cmdclass = {"build_ext": cmake_build_ext}
 
 setup(
     # static metadata should rather go in pyproject.toml

From 0780bb57835dcd9ee666aaf807c37086de67422b Mon Sep 17 00:00:00 2001
From: Alexei-V-Ivanov-AMD
 <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com>
Date: Thu, 31 Jul 2025 11:53:27 -0500
Subject: [PATCH 094/224] Removing amdproduction Tests (#22027)

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-pipeline.yaml | 46 +++++++++++++++++------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 2bf0b6fd9a169..a7fe200559305 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -82,7 +82,7 @@ steps:
   - bash standalone_tests/python_only_compile.sh
 
 - label: Basic Correctness Test # 30min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   fast_check: true
   torch_nightly: true
   source_file_dependencies:
@@ -99,7 +99,7 @@ steps:
   - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
 
 - label: Chunked Prefill Test
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - vllm/
   - tests/basic_correctness/test_chunked_prefill
@@ -108,7 +108,7 @@ steps:
   - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
 
 - label: Core Test # 10min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   fast_check: true
   source_file_dependencies:
   - vllm/core
@@ -209,7 +209,7 @@ steps:
   - pytest -v -s distributed/test_eplb_execute.py
 
 - label: Metrics, Tracing Test # 10min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   num_gpus: 2
   source_file_dependencies:
   - vllm/
@@ -228,7 +228,7 @@ steps:
 #####  1 GPU test  #####
 
 - label: Regression Test # 5min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - vllm/
   - tests/test_regression
@@ -280,7 +280,7 @@ steps:
     - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
 
 - label: Examples Test # 25min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   working_dir: "/vllm-workspace/examples"
   source_file_dependencies:
   - vllm/entrypoints
@@ -305,7 +305,7 @@ steps:
     - VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
 
 - label: Prefix Caching Test # 9min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - vllm/
   - tests/prefix_caching
@@ -314,7 +314,7 @@ steps:
 
 
 - label: Platform Tests (CUDA)
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - vllm/
   - tests/cuda
@@ -355,7 +355,7 @@ steps:
     - pytest -v -s compile/test_async_tp.py
 
 - label: PyTorch Fullgraph Smoke Test # 9min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   torch_nightly: true
   source_file_dependencies:
   - vllm/
@@ -368,7 +368,7 @@ steps:
   - pytest -v -s compile/piecewise/test_full_cudagraph.py
 
 - label: PyTorch Fullgraph Test # 18min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   torch_nightly: true
   source_file_dependencies:
   - vllm/
@@ -377,7 +377,7 @@ steps:
   - pytest -v -s compile/test_full_graph.py
 
 - label: Kernels Core Operation Test
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - csrc/
   - tests/kernels/core
@@ -416,7 +416,7 @@ steps:
   parallelism: 2
 
 - label: Kernels Mamba Test
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - csrc/mamba/
   - tests/kernels/mamba
@@ -424,7 +424,7 @@ steps:
     - pytest -v -s kernels/mamba
 
 - label: Tensorizer Test # 11min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   soft_fail: true
   source_file_dependencies:
   - vllm/model_executor/model_loader
@@ -437,7 +437,7 @@ steps:
     - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
 
 - label: Model Executor Test
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - vllm/model_executor
   - tests/model_executor
@@ -447,7 +447,7 @@ steps:
     - pytest -v -s model_executor
 
 - label: Benchmarks # 9min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   working_dir: "/vllm-workspace/.buildkite"
   source_file_dependencies:
   - benchmarks/
@@ -455,7 +455,7 @@ steps:
   - bash scripts/run-benchmarks.sh
 
 - label: Benchmarks CLI Test # 10min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - vllm/
   - tests/benchmarks/
@@ -494,7 +494,7 @@ steps:
   - pytest -s entrypoints/openai/correctness/
 
 - label: Encoder Decoder tests # 5min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - vllm/
   - tests/encoder_decoder
@@ -502,7 +502,7 @@ steps:
     - pytest -v -s encoder_decoder
 
 - label: OpenAI-Compatible Tool Use # 20 min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   fast_check: false
   source_file_dependencies:
     - vllm/
@@ -623,7 +623,7 @@ steps:
 
 # This test is used only in PR development phase to test individual models and should never run on main
 - label: Custom Models Test
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   optional: true
   commands:
     - echo 'Testing custom models...'
@@ -658,7 +658,7 @@ steps:
 #####  multi gpus test  #####
 
 - label: Distributed Comm Ops Test # 7min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   working_dir: "/vllm-workspace/tests"
   num_gpus: 2
   source_file_dependencies:
@@ -755,7 +755,7 @@ steps:
   - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
 
 - label: Multi-step Tests (4 GPUs) # 36min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   working_dir: "/vllm-workspace/tests"
   num_gpus: 4
   source_file_dependencies:
@@ -776,7 +776,7 @@ steps:
   - pytest -v -s multi_step/test_correctness_llm.py
 
 - label: Pipeline Parallelism Test # 45min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   working_dir: "/vllm-workspace/tests"
   num_gpus: 4
   source_file_dependencies:
@@ -790,7 +790,7 @@ steps:
   - pytest -v -s distributed/test_pipeline_parallel.py
 
 - label: LoRA TP Test (Distributed)
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   num_gpus: 4
   source_file_dependencies:
   - vllm/lora

From 53c21e492e0acd140a9984c8ec7cc3a7123efee5 Mon Sep 17 00:00:00 2001
From: XiongfeiWei <isaacwxf23@gmail.com>
Date: Thu, 31 Jul 2025 10:26:43 -0700
Subject: [PATCH 095/224] Update torch_xla pin to 20250730 (#21956)

Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com>
---
 docker/Dockerfile.tpu | 2 +-
 requirements/tpu.txt  | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docker/Dockerfile.tpu b/docker/Dockerfile.tpu
index b9fc9def88190..2190151369761 100644
--- a/docker/Dockerfile.tpu
+++ b/docker/Dockerfile.tpu
@@ -1,4 +1,4 @@
-ARG NIGHTLY_DATE="20250724"
+ARG NIGHTLY_DATE="20250730"
 ARG BASE_IMAGE="us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.12_tpuvm_$NIGHTLY_DATE"
 
 FROM $BASE_IMAGE
diff --git a/requirements/tpu.txt b/requirements/tpu.txt
index 2d0d8bd8457e3..7bb77c4a99636 100644
--- a/requirements/tpu.txt
+++ b/requirements/tpu.txt
@@ -19,8 +19,8 @@ nixl==0.3.0
 --find-links https://storage.googleapis.com/libtpu-releases/index.html
 --find-links https://storage.googleapis.com/jax-releases/jax_nightly_releases.html
 --find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html
-torch==2.9.0.dev20250724
-torchvision==0.24.0.dev20250724
-torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250724-cp311-cp311-linux_x86_64.whl ; python_version == "3.11"
-torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250724-cp312-cp312-linux_x86_64.whl ; python_version == "3.12"
+torch==2.9.0.dev20250730
+torchvision==0.24.0.dev20250730
+torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250730-cp311-cp311-linux_x86_64.whl ; python_version == "3.11"
+torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250730-cp312-cp312-linux_x86_64.whl ; python_version == "3.12"
 

From 9e0726e5bfd201fa2c9209e3997d24c72ecc3b13 Mon Sep 17 00:00:00 2001
From: zhiweiz <morgendave@gmail.com>
Date: Thu, 31 Jul 2025 10:35:07 -0700
Subject: [PATCH 096/224] [Meta] Official Eagle mm support, first enablement on
 llama4 (#20788)

Signed-off-by: morgendave <morgendave@gmail.com>
Co-authored-by: Roger Wang <hey@rogerw.me>
---
 examples/offline_inference/spec_decode.py  | 64 ++++++++++++++++++++--
 tests/v1/e2e/test_spec_decode.py           | 61 +++++++++++++++------
 vllm/model_executor/models/llama4.py       |  1 +
 vllm/model_executor/models/llama4_eagle.py | 35 ++++++++++--
 vllm/model_executor/models/llama_eagle.py  |  6 ++
 vllm/model_executor/models/llama_eagle3.py |  5 ++
 vllm/v1/spec_decode/eagle.py               | 59 +++++++++++++++++---
 vllm/v1/worker/gpu_model_runner.py         | 10 +++-
 8 files changed, 205 insertions(+), 36 deletions(-)

diff --git a/examples/offline_inference/spec_decode.py b/examples/offline_inference/spec_decode.py
index ce735f3b27dfe..184c30891eca7 100644
--- a/examples/offline_inference/spec_decode.py
+++ b/examples/offline_inference/spec_decode.py
@@ -13,6 +13,38 @@ except ImportError:
     from argparse import ArgumentParser as FlexibleArgumentParser
 
 
+QUESTION = "What is the content of each image?"
+IMAGE_URLS = [
+    "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/2/26/Ultramarine_Flycatcher_%28Ficedula_superciliaris%29_Naggar%2C_Himachal_Pradesh%2C_2013_%28cropped%29.JPG",
+    "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e5/Anim1754_-_Flickr_-_NOAA_Photo_Library_%281%29.jpg/2560px-Anim1754_-_Flickr_-_NOAA_Photo_Library_%281%29.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/d/d4/Starfish%2C_Caswell_Bay_-_geograph.org.uk_-_409413.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/6/69/Grapevinesnail_01.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/thumb/0/0b/Texas_invasive_Musk_Thistle_1.jpg/1920px-Texas_invasive_Musk_Thistle_1.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/thumb/7/7a/Huskiesatrest.jpg/2880px-Huskiesatrest.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg/1920px-Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/3/30/George_the_amazing_guinea_pig.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/thumb/1/1f/Oryctolagus_cuniculus_Rcdo.jpg/1920px-Oryctolagus_cuniculus_Rcdo.jpg",
+    "https://upload.wikimedia.org/wikipedia/commons/9/98/Horse-and-pony.jpg",
+]
+
+
+def get_custom_mm_prompts(num_prompts):
+    prompts = []
+    for url in IMAGE_URLS:
+        prompts.append(
+            [
+                {"type": "image_url", "image_url": {"url": url}},
+                {"type": "text", "text": QUESTION},
+            ]
+        )
+    if num_prompts > len(IMAGE_URLS):
+        prompts = prompts * (num_prompts // len(IMAGE_URLS) + 1)
+
+    return [[{"role": "user", "content": prompt}] for prompt in prompts[:num_prompts]]
+
+
 def parse_args():
     parser = FlexibleArgumentParser()
     add_dataset_parser(parser)
@@ -35,6 +67,7 @@ def parse_args():
     parser.add_argument("--output-len", type=int, default=256)
     parser.add_argument("--model-dir", type=str, default=None)
     parser.add_argument("--eagle-dir", type=str, default=None)
+    parser.add_argument("--custom-mm-prompts", action="store_true")
     return parser.parse_args()
 
 
@@ -44,14 +77,26 @@ def main():
 
     model_dir = args.model_dir
     if args.model_dir is None:
+        if args.custom_mm_prompts:
+            raise ValueError(
+                "custom_mm_prompts requires mm based models"
+                "default llama3.1-8b-instruct is not mm based"
+                "please specify model_dir to give a mm based model"
+            )
         model_dir = "meta-llama/Llama-3.1-8B-Instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_dir)
+    args.custom_skip_chat_template = True
 
-    prompts = get_samples(args, tokenizer)
-    # add_special_tokens is False to avoid adding bos twice when using chat templates
-    prompt_ids = [
-        tokenizer.encode(prompt.prompt, add_special_tokens=False) for prompt in prompts
-    ]
+    if not args.custom_mm_prompts:
+        prompts = get_samples(args, tokenizer)
+        # add_special_tokens is False to avoid adding bos twice
+        # when using chat templates
+        prompt_ids = [
+            tokenizer.encode(prompt.prompt, add_special_tokens=False)
+            for prompt in prompts
+        ]
+    else:
+        prompts = get_custom_mm_prompts(args.num_prompts)
 
     if args.method == "eagle" or args.method == "eagle3":
         eagle_dir = args.eagle_dir
@@ -85,10 +130,17 @@ def main():
         speculative_config=speculative_config,
         disable_log_stats=False,
         max_model_len=16384,
+        limit_mm_per_prompt={"image": 5},
+        disable_chunked_mm_input=True,
     )
 
     sampling_params = SamplingParams(temperature=args.temp, max_tokens=args.output_len)
-    outputs = llm.generate(prompt_token_ids=prompt_ids, sampling_params=sampling_params)
+    if not args.custom_mm_prompts:
+        outputs = llm.generate(
+            prompt_token_ids=prompt_ids, sampling_params=sampling_params
+        )
+    else:
+        outputs = llm.chat(prompts, sampling_params=sampling_params)
 
     # print the generated text
     if args.print_output:
diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py
index 2423f966acfab..31f25e94c5b4b 100644
--- a/tests/v1/e2e/test_spec_decode.py
+++ b/tests/v1/e2e/test_spec_decode.py
@@ -3,29 +3,34 @@
 from __future__ import annotations
 
 import random
-from typing import Any
+from typing import Any, Union
 
 import pytest
 import torch
 
 from vllm import LLM, SamplingParams
+from vllm.assets.base import VLLM_S3_BUCKET_URL
+from vllm.assets.image import VLM_IMAGES_DIR
 from vllm.distributed import cleanup_dist_env_and_memory
 
 
-@pytest.fixture
-def test_prompts():
+def get_test_prompts(mm_enabled: bool):
     prompt_types = ["repeat", "sentence"]
+    if mm_enabled:
+        prompt_types.append("mm")
     num_prompts = 100
     prompts = []
 
     random.seed(0)
     random_prompt_type_choices = random.choices(prompt_types, k=num_prompts)
+    print(f"Prompt types: {random_prompt_type_choices}")
 
     # Generate a mixed batch of prompts, some of which can be easily
     # predicted by n-gram matching and some which likely cannot.
     for kind in random_prompt_type_choices:
         word_choices = ["test", "temp", "hello", "where"]
         word = random.choice(word_choices)
+        prompt: Union[str, list[dict[str, Any]]] = ""
         if kind == "repeat":
             prompt = f"""
             please repeat the word '{word}' 10 times.
@@ -38,6 +43,21 @@ def test_prompts():
             uses the word {word} at least once.
             give no other output than that simple sentence without quotes.
             """
+        elif kind == "mm":
+            placeholders = [{
+                "type": "image_url",
+                "image_url": {
+                    "url":
+                    f"{VLLM_S3_BUCKET_URL}/{VLM_IMAGES_DIR}/stop_sign.jpg"
+                },
+            }]
+            prompt = [
+                *placeholders,
+                {
+                    "type": "text",
+                    "text": "The meaning of the image is"
+                },
+            ]
         else:
             raise ValueError(f"Unknown prompt type: {kind}")
         prompts.append([{"role": "user", "content": prompt}])
@@ -57,7 +77,6 @@ def model_name():
 
 def test_ngram_correctness(
     monkeypatch: pytest.MonkeyPatch,
-    test_prompts: list[list[dict[str, Any]]],
     sampling_config: SamplingParams,
     model_name: str,
 ):
@@ -67,6 +86,7 @@ def test_ngram_correctness(
     '''
     with monkeypatch.context() as m:
         m.setenv("VLLM_USE_V1", "1")
+        test_prompts = get_test_prompts(mm_enabled=False)
 
         ref_llm = LLM(model=model_name, max_model_len=1024)
         ref_outputs = ref_llm.chat(test_prompts, sampling_config)
@@ -103,23 +123,32 @@ def test_ngram_correctness(
         cleanup_dist_env_and_memory()
 
 
-@pytest.mark.parametrize("model_setup", [
-    ("eagle", "meta-llama/Llama-3.1-8B-Instruct",
-     "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1),
-    ("eagle3", "meta-llama/Llama-3.1-8B-Instruct",
-     "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", 1),
-    pytest.param(
-        ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-         "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
-        marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
-],
-                         ids=["llama3_eagle", "llama3_eagle3", "llama4_eagle"])
+@pytest.mark.parametrize(
+    ["model_setup", "mm_enabled"], [
+        (("eagle", "meta-llama/Llama-3.1-8B-Instruct",
+          "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1), False),
+        (("eagle3", "meta-llama/Llama-3.1-8B-Instruct",
+          "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", 1), False),
+        pytest.param(
+            ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+             "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
+            False,
+            marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
+        pytest.param(
+            ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+             "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
+            True,
+            marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
+    ],
+    ids=["llama3_eagle", "llama3_eagle3", "llama4_eagle", "llama4_eagle_mm"])
 def test_eagle_correctness(
     monkeypatch: pytest.MonkeyPatch,
-    test_prompts: list[list[dict[str, Any]]],
     sampling_config: SamplingParams,
     model_setup: tuple[str, str, str, int],
+    mm_enabled: bool,
 ):
+    # Generate test prompts inside the function instead of using fixture
+    test_prompts = get_test_prompts(mm_enabled)
     '''
     Compare the outputs of a original LLM and a speculative LLM
     should be the same when using eagle speculative decoding.
diff --git a/vllm/model_executor/models/llama4.py b/vllm/model_executor/models/llama4.py
index 470e701d98013..60098209c39ac 100644
--- a/vllm/model_executor/models/llama4.py
+++ b/vllm/model_executor/models/llama4.py
@@ -256,6 +256,7 @@ class Llama4DecoderLayer(nn.Module):
         super().__init__()
 
         self.layer_idx = extract_layer_index(prefix)
+        self.global_layer = config.no_rope_layers[self.layer_idx] == 0
         self.hidden_size = config.hidden_size
         rope_theta = config.rope_theta
         rope_scaling = config.rope_scaling
diff --git a/vllm/model_executor/models/llama4_eagle.py b/vllm/model_executor/models/llama4_eagle.py
index 222ab5dfaee4a..ece490ff2f2a8 100644
--- a/vllm/model_executor/models/llama4_eagle.py
+++ b/vllm/model_executor/models/llama4_eagle.py
@@ -37,8 +37,9 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.models.llama4 import (Llama4DecoderLayer,
                                                Llama4ForCausalLM)
 from vllm.model_executor.models.utils import extract_layer_index
+from vllm.multimodal.inputs import NestedTensors
 
-from .utils import AutoWeightsLoader, maybe_prefix
+from .utils import AutoWeightsLoader, maybe_prefix, merge_multimodal_embeddings
 
 logger = init_logger(__name__)
 
@@ -78,15 +79,23 @@ class LlamaModel(nn.Module):
         self.norm = RMSNorm(self.config.hidden_size,
                             eps=self.config.rms_norm_eps)
 
+    def get_input_embeddings(
+        self,
+        input_ids: torch.Tensor,
+    ) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
     def forward(
         self,
         input_ids: Optional[torch.Tensor],
         positions: torch.Tensor,
         hidden_states: torch.Tensor,
+        inputs_embeds: Optional[torch.Tensor] = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        input_embeds = self.embed_tokens(input_ids)
+        if inputs_embeds is None:
+            inputs_embeds = self.get_input_embeddings(input_ids)
         hidden_states = self.fc(
-            torch.cat((input_embeds, hidden_states), dim=-1))
+            torch.cat((inputs_embeds, hidden_states), dim=-1))
         residual = None
         for layer in self.layers:
             hidden_states, residual = layer(
@@ -190,8 +199,9 @@ class EagleLlama4ForCausalLM(Llama4ForCausalLM):
         input_ids: torch.Tensor,
         positions: torch.Tensor,
         hidden_states: torch.Tensor,
+        inputs_embeds: Optional[torch.Tensor] = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        return self.model(input_ids, positions, hidden_states)
+        return self.model(input_ids, positions, hidden_states, inputs_embeds)
 
     def load_weights(self, weights: Iterable[tuple[str,
                                                    torch.Tensor]]) -> None:
@@ -212,3 +222,20 @@ class EagleLlama4ForCausalLM(Llama4ForCausalLM):
             model_weights[name] = loaded_weight
 
         loader.load_weights(model_weights.items())
+
+    def get_input_embeddings(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: Optional[NestedTensors] = None,
+    ) -> torch.Tensor:
+        inputs_embeds = self.model.get_input_embeddings(input_ids)
+
+        if multimodal_embeddings is not None:
+            inputs_embeds = merge_multimodal_embeddings(
+                input_ids,
+                inputs_embeds,
+                multimodal_embeddings,
+                self.config.image_token_index,
+            )
+
+        return inputs_embeds
diff --git a/vllm/model_executor/models/llama_eagle.py b/vllm/model_executor/models/llama_eagle.py
index c7690604c1d09..a4933b77e3a53 100644
--- a/vllm/model_executor/models/llama_eagle.py
+++ b/vllm/model_executor/models/llama_eagle.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from collections.abc import Iterable
+from typing import Optional
 
 import torch
 import torch.nn as nn
@@ -148,7 +149,12 @@ class EagleLlamaForCausalLM(LlamaForCausalLM):
         input_ids: torch.Tensor,
         positions: torch.Tensor,
         hidden_states: torch.Tensor,
+        inputs_embeds: Optional[torch.Tensor] = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
+        if inputs_embeds is not None:
+            raise NotImplementedError(
+                f"{type(self).__name__} does not support multimodal inputs yet."
+            )
         return self.model(input_ids, positions, hidden_states)
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py
index 7fc9fe2ebb6f6..71275f0d58579 100644
--- a/vllm/model_executor/models/llama_eagle3.py
+++ b/vllm/model_executor/models/llama_eagle3.py
@@ -202,7 +202,12 @@ class Eagle3LlamaForCausalLM(LlamaForCausalLM):
         input_ids: torch.Tensor,
         positions: torch.Tensor,
         hidden_states: torch.Tensor,
+        inputs_embeds: Optional[torch.Tensor] = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
+        if inputs_embeds is not None:
+            raise NotImplementedError(
+                f"{type(self).__name__} does not support multimodal inputs yet."
+            )
         return self.model(input_ids, positions, hidden_states)
 
     def compute_logits(
diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
index 63f6fc276189d..302126dbe3d5f 100644
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Optional
+
 import numpy as np
 import torch
 import torch.nn as nn
@@ -51,6 +53,9 @@ class EagleProposer:
         # hidden size (e.g., Llama 3.3 70B).
         self.hidden_size = self.draft_model_config.get_hidden_size()
 
+        self.is_multimodal_model = vllm_config.model_config \
+            .is_multimodal_model
+
         self.use_cuda_graph = (self.vllm_config.compilation_config.level
                                == CompilationLevel.PIECEWISE and
                                not self.vllm_config.model_config.enforce_eager)
@@ -76,6 +81,11 @@ class EagleProposer:
                                    device=device,
                                    dtype=torch.int32)
 
+        self.inputs_embeds = torch.zeros(
+            (self.max_num_tokens, self.hidden_size),
+            dtype=self.dtype,
+            device=device)
+
     def propose(
         self,
         # [num_tokens]
@@ -88,6 +98,7 @@ class EagleProposer:
         next_token_ids: torch.Tensor,
         common_attn_metadata: CommonAttentionMetadata,
         sampling_metadata: SamplingMetadata,
+        mm_embeds: Optional[list[torch.Tensor]] = None,
     ) -> torch.Tensor:
         num_tokens = target_token_ids.shape[0]
         batch_size = next_token_ids.shape[0]
@@ -128,14 +139,27 @@ class EagleProposer:
         # copy inputs to buffer for cudagraph
         self.positions[:num_tokens] = target_positions
         self.hidden_states[:num_tokens] = target_hidden_states
+        if self.is_multimodal_model:
+            input_ids = self.input_ids[:num_tokens]
+            inputs_embeds = self.model.get_input_embeddings(
+                input_ids,
+                multimodal_embeddings=mm_embeds or None,
+            )
+            self.inputs_embeds[:num_tokens] = inputs_embeds
+            inputs_embeds = self.inputs_embeds[:num_input_tokens]
+            input_ids = None
+        else:
+            inputs_embeds = None
+            input_ids = self.input_ids[:num_input_tokens]
 
         with set_forward_context(per_layer_attn_metadata,
                                  self.vllm_config,
                                  num_tokens=num_input_tokens):
             ret_hidden_states = self.model(
-                self.input_ids[:num_input_tokens],
-                self.positions[:num_input_tokens],
-                self.hidden_states[:num_input_tokens],
+                input_ids=input_ids,
+                positions=self.positions[:num_input_tokens],
+                hidden_states=self.hidden_states[:num_input_tokens],
+                inputs_embeds=inputs_embeds,
             )
             if self.method == "deepseek_mtp":
                 last_hidden_states = ret_hidden_states
@@ -218,15 +242,24 @@ class EagleProposer:
             self.input_ids[:batch_size] = input_ids
             self.positions[:batch_size] = clamped_positions
             self.hidden_states[:batch_size] = hidden_states
+            if self.is_multimodal_model:
+                inputs_embeds = self.model.get_input_embeddings(input_ids)
+                self.inputs_embeds[:batch_size] = inputs_embeds
+                inputs_embeds = self.inputs_embeds[:input_batch_size]
+                input_ids = None
+            else:
+                inputs_embeds = None
+                input_ids = self.input_ids[:input_batch_size]
 
             # Run the model.
             with set_forward_context(per_layer_attn_metadata,
                                      self.vllm_config,
                                      num_tokens=input_batch_size):
                 last_hidden_states, hidden_states = self.model(
-                    self.input_ids[:input_batch_size],
-                    self.positions[:input_batch_size],
-                    self.hidden_states[:input_batch_size],
+                    input_ids=input_ids,
+                    positions=self.positions[:input_batch_size],
+                    hidden_states=self.hidden_states[:input_batch_size],
+                    inputs_embeds=inputs_embeds,
                 )
             hidden_states = hidden_states[:batch_size]
             logits = self.model.compute_logits(last_hidden_states[:batch_size],
@@ -391,10 +424,18 @@ class EagleProposer:
     ) -> None:
         with set_forward_context(None, self.vllm_config,
                                  num_tokens=num_tokens):
+            if self.is_multimodal_model:
+                input_ids = None
+                inputs_embeds = self.inputs_embeds[:num_tokens]
+            else:
+                input_ids = self.input_ids[:num_tokens]
+                inputs_embeds = None
+
             self.model(
-                self.input_ids[:num_tokens],
-                self.positions[:num_tokens],
-                self.hidden_states[:num_tokens],
+                input_ids=input_ids,
+                positions=self.positions[:num_tokens],
+                hidden_states=self.hidden_states[:num_tokens],
+                inputs_embeds=inputs_embeds,
             )
 
     def validate_same_kv_cache_group(self,
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 987ef22a1b7fb..29cda4d837bf3 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -1205,13 +1205,15 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
     def _gather_mm_embeddings(
         self,
         scheduler_output: "SchedulerOutput",
+        shift_computed_tokens: int = 0,
     ) -> list[torch.Tensor]:
         mm_embeds: list[torch.Tensor] = []
         for req_id in self.input_batch.req_ids:
             num_scheduled_tokens = scheduler_output.num_scheduled_tokens[
                 req_id]
             req_state = self.requests[req_id]
-            num_computed_tokens = req_state.num_computed_tokens
+            num_computed_tokens = \
+                req_state.num_computed_tokens + shift_computed_tokens
             mm_positions = req_state.mm_positions
             for i, pos_info in enumerate(mm_positions):
                 start_pos = pos_info.offset
@@ -1858,6 +1860,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                         [h[token_indices] for h in aux_hidden_states], dim=-1)
                 else:
                     target_hidden_states = hidden_states[token_indices]
+            mm_embeds = None
+            if self.is_multimodal_model:
+                mm_embeds = self._gather_mm_embeddings(scheduler_output,
+                                                       shift_computed_tokens=1)
+
             draft_token_ids = self.drafter.propose(
                 target_token_ids=target_token_ids,
                 target_positions=target_positions,
@@ -1865,6 +1872,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 next_token_ids=next_token_ids,
                 sampling_metadata=sampling_metadata,
                 common_attn_metadata=common_attn_metadata,
+                mm_embeds=mm_embeds,
             )
             spec_token_ids = draft_token_ids.tolist()
         return spec_token_ids

From 71470bc4afdab89eccc232b668a69571ffede1dc Mon Sep 17 00:00:00 2001
From: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com>
Date: Thu, 31 Jul 2025 11:39:16 -0700
Subject: [PATCH 097/224] [Misc] Add unit tests for chunked local attention
 (#21692)

Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
---
 .../attention/test_chunked_local_attention.py | 196 ++++++++++++++++++
 tests/v1/attention/utils.py                   |  36 ++--
 2 files changed, 219 insertions(+), 13 deletions(-)
 create mode 100644 tests/v1/attention/test_chunked_local_attention.py

diff --git a/tests/v1/attention/test_chunked_local_attention.py b/tests/v1/attention/test_chunked_local_attention.py
new file mode 100644
index 0000000000000..8c5a63653db9f
--- /dev/null
+++ b/tests/v1/attention/test_chunked_local_attention.py
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
+
+import numpy as np
+import pytest
+import torch
+
+from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata
+from vllm.v1.attention.backends.utils import (
+    make_local_attention_virtual_batches)
+
+
+@dataclass
+class LocalAttentionTestData:
+    # Input parameters
+    batch_spec: BatchSpec
+    attn_chunk_size: int
+    block_size: int
+    # Expected return values
+    expected_q_seqlens: list[int]
+    expected_k_seqlens: list[int]
+    expected_local_block_table: list[list[int]]
+
+
+test_data_list = [
+    # Same as example in docstring of make_local_attention_virtual_batches
+    # except block table has 9 columns instead of 10
+    LocalAttentionTestData(
+        batch_spec=BatchSpec(
+            query_lens=[4, 10, 5],
+            seq_lens=[6, 17, 9],
+        ),
+        attn_chunk_size=4,
+        block_size=2,
+        expected_q_seqlens=[2, 2, 1, 4, 4, 1, 4, 1],
+        expected_k_seqlens=[4, 2, 4, 4, 4, 1, 4, 1],
+        # 2 pages per local branch
+        # (chunk size 4 // block size 2)
+        expected_local_block_table=[
+            [0, 1],  # local-batch 0, (batch 0, starting from k[0])
+            [2, 3],  # local-batch 1, (batch 0, starting from k[4])
+            [11, 12],  # local-batch 2, (batch 1, starting from k[4])
+            [13, 14],  # local-batch 3, (batch 1, starting from k[8])
+            [15, 16],  # local-batch 4, (batch 1, starting from k[12])
+            [17, 17],  # local-batch 5, (batch 1, starting from k[16])
+            [20, 21],  # local-batch 6, (batch 2, starting from k[4])
+            [22, 23],  # local-batch 7, (batch 2, starting from k[8])
+        ]),
+    # Case where block indices are not clipped to block table ncols-1
+    # because tokens_in_last_block == attn_chunk_size
+    LocalAttentionTestData(batch_spec=BatchSpec(
+        query_lens=[8],
+        seq_lens=[12],
+    ),
+                           attn_chunk_size=4,
+                           block_size=2,
+                           expected_q_seqlens=[4, 4],
+                           expected_k_seqlens=[4, 4],
+                           expected_local_block_table=[
+                               [2, 3],
+                               [4, 5],
+                           ]),
+    # Case where all kv_seq positions are involved in attn
+    LocalAttentionTestData(
+        batch_spec=BatchSpec(
+            query_lens=[7],
+            # 10 - 7 = 3 previously computed tokens
+            seq_lens=[10],
+        ),
+        attn_chunk_size=4,
+        block_size=2,
+        expected_q_seqlens=[1, 4, 2],
+        expected_k_seqlens=[4, 4, 2],
+        expected_local_block_table=[
+            [0, 1],
+            [2, 3],
+            [4, 4],
+        ]),
+    # Case where attn_chunk_size > kv_seq_len
+    # so no extra mini virtual batches are created
+    LocalAttentionTestData(
+        batch_spec=BatchSpec(
+            query_lens=[4],
+            seq_lens=[6],
+        ),
+        # Larger than kv_seq_len
+        attn_chunk_size=10,
+        block_size=2,
+        # No change to q_seqlens and k_seqlens
+        expected_q_seqlens=[4],
+        expected_k_seqlens=[6],
+        # In this case, we only need a block-table like:
+        #  block_table = [ [0, 1, 2] ] # 1 batch, 3 pages
+        # But we need to pad it to 5 pages per local batch
+        # because currently the pages_per_local_batch
+        # is calculated as (attn_chunk_size // block_size)
+        expected_local_block_table=[
+            [0, 1, 2, 2, 2],
+        ]),
+    # Block size equal to chunk size
+    # Expect single page per batch in local batch table
+    LocalAttentionTestData(
+        batch_spec=BatchSpec(
+            query_lens=[6, 6],
+            seq_lens=[8, 8],
+        ),
+        attn_chunk_size=4,
+        block_size=4,
+        expected_q_seqlens=[2, 4, 2, 4],
+        expected_k_seqlens=[4, 4, 4, 4],
+        # Initial block table = [
+        #    [0, 1], < batch 0
+        #    [2, 3], < batch 1
+        # ]
+        expected_local_block_table=[
+            [0],  # local-batch 0, (batch 0, starting from k[0])
+            [1],  # local-batch 1, (batch 0, starting from k[4])
+            [2],  # local-batch 1, (batch 0, starting from k[0])
+            [3],  # local-batch 1, (batch 0, starting from k[4])
+        ]),
+    # Case where query falls in the second attention chunk
+    #  k_toks >   0 1 2 3 4
+    #  q_toks v  _____________
+    #         0 | 1
+    #         1 | 1 1
+    #         2 | 1 1 1
+    #         3 | 1 1 1 1
+    #         4 |         1
+    #  where tokens 0,1,2,3 have been pre-computed
+    LocalAttentionTestData(batch_spec=BatchSpec(
+        query_lens=[1],
+        seq_lens=[5],
+    ),
+                           attn_chunk_size=4,
+                           block_size=2,
+                           expected_q_seqlens=[1],
+                           expected_k_seqlens=[1],
+                           expected_local_block_table=[
+                               [2, 2],
+                           ]),
+]
+
+
+@pytest.mark.parametrize("test_data", test_data_list)
+def test_local_attention_virtual_batches(test_data: LocalAttentionTestData):
+    device = torch.device("cuda:0")
+    batch_spec = test_data.batch_spec
+    attn_chunk_size = test_data.attn_chunk_size
+    block_size = test_data.block_size
+    expected_q_seqlens = test_data.expected_q_seqlens
+    expected_k_seqlens = test_data.expected_k_seqlens
+    expected_local_block_table = test_data.expected_local_block_table
+
+    # Create common attention metadata
+    common_attn_metadata = create_common_attn_metadata(
+        batch_spec,
+        block_size,
+        device,
+        # Use torch.arange instead of torch.randint so we can assert on
+        # block table tensor values. The block table will have shape
+        # (num_batches, cdiv(max_seq_len, block_size)) and the values will be
+        # aranged from 0 to cdiv(max_seq_len, block_size)-1
+        arange_block_indices=True,
+    )
+
+    # Call the function
+    result = make_local_attention_virtual_batches(attn_chunk_size,
+                                                  common_attn_metadata,
+                                                  block_size)
+
+    # Convert to numpy for easier comparison
+    actual_q_seqlens = np.diff(result.query_start_loc_cpu.numpy())
+    actual_k_seqlens = result.seq_lens_cpu.numpy()
+
+    # Check that all query lengths are less than or equal to attn_chunk_size
+    assert all(q_len <= attn_chunk_size for q_len in actual_q_seqlens)
+    # Check that all key lengths are less than or equal to attn_chunk_size
+    assert all(k_len <= attn_chunk_size for k_len in actual_k_seqlens)
+    # Check that the total number of query tokens is preserved
+    assert sum(actual_q_seqlens) == sum(batch_spec.query_lens)
+
+    # Verify results
+    np.testing.assert_array_equal(actual_q_seqlens, expected_q_seqlens)
+    np.testing.assert_array_equal(actual_k_seqlens, expected_k_seqlens)
+
+    expected_block_table_tensor =\
+        torch.tensor(expected_local_block_table,
+        dtype=torch.int32,
+        device=device)
+
+    print(f"Expected block table:\n{expected_block_table_tensor}")
+    print(f"Actual block table:\n{result.block_table_tensor}")
+
+    torch.testing.assert_close(result.block_table_tensor,
+                               expected_block_table_tensor)
diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py
index ae2ab6e6413c0..be6cfce6fba8a 100644
--- a/tests/v1/attention/utils.py
+++ b/tests/v1/attention/utils.py
@@ -40,7 +40,8 @@ def create_common_attn_metadata(
         batch_spec: BatchSpec,
         block_size: int,
         device: torch.device,
-        max_block_idx: int = 1000) -> CommonAttentionMetadata:
+        max_block_idx: int = 1000,
+        arange_block_indices: bool = False) -> CommonAttentionMetadata:
     """Create CommonAttentionMetadata from a BatchSpec and ModelParams."""
     # Create query start locations
     query_start_loc = torch.zeros(batch_spec.batch_size + 1,
@@ -65,19 +66,28 @@ def create_common_attn_metadata(
     ]
     num_computed_tokens_cpu = torch.tensor(context_lens, dtype=torch.int32)
 
-    # Create block table (random for testing)
+    # Create block table and slot mapping
     max_blocks = (max(batch_spec.seq_lens) + block_size - 1) // block_size
-    block_table_tensor = torch.randint(0,
-                                       max_block_idx,
-                                       (batch_spec.batch_size, max_blocks),
-                                       dtype=torch.int32,
-                                       device=device)
-
-    # Create slot mapping
-    slot_mapping = torch.randint(0,
-                                 max_block_idx, (num_tokens, ),
-                                 dtype=torch.int64,
-                                 device=device)
+    if arange_block_indices:
+        num_blocks = batch_spec.batch_size * max_blocks
+        block_table_tensor = torch.arange(num_blocks,
+                                          dtype=torch.int32,
+                                          device=device).view(
+                                              batch_spec.batch_size,
+                                              max_blocks)
+        slot_mapping = torch.arange(num_tokens,
+                                    dtype=torch.int64,
+                                    device=device).view(num_tokens)
+    else:
+        block_table_tensor = torch.randint(0,
+                                           max_block_idx,
+                                           (batch_spec.batch_size, max_blocks),
+                                           dtype=torch.int32,
+                                           device=device)
+        slot_mapping = torch.randint(0,
+                                     max_block_idx, (num_tokens, ),
+                                     dtype=torch.int64,
+                                     device=device)
 
     # Calculate max query length
     max_query_len = max(batch_spec.query_lens)

From 2dff2e21d928129e985b23897e9f326abe3f1417 Mon Sep 17 00:00:00 2001
From: Benjamin Chislett <benjamin.chislett@centml.ai>
Date: Thu, 31 Jul 2025 16:33:53 -0400
Subject: [PATCH 098/224] [Bugfix] Fix MTP weight loading  (#21941)

---
 vllm/model_executor/models/deepseek_mtp.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py
index 911f0036c2dd6..2e026d582a6de 100644
--- a/vllm/model_executor/models/deepseek_mtp.py
+++ b/vllm/model_executor/models/deepseek_mtp.py
@@ -182,6 +182,8 @@ class DeepSeekMTP(nn.Module, SupportsPP):
         stacked_params_mapping = [
             ("gate_up_proj", "gate_proj", 0),
             ("gate_up_proj", "up_proj", 1),
+            ("fused_qkv_a_proj", "q_a_proj", 0),
+            ("fused_qkv_a_proj", "kv_a_proj_with_mqa", 1),
         ]
 
         expert_params_mapping = FusedMoE.make_expert_params_mapping(
@@ -212,6 +214,13 @@ class DeepSeekMTP(nn.Module, SupportsPP):
                 if (("mlp.experts." in name) and name not in params_dict):
                     continue
                 name = name.replace(weight_name, param_name)
+
+                # QKV fusion is optional, fall back to normal
+                # weight loading if it's not enabled
+                if ((param_name == "fused_qkv_a_proj")
+                        and name not in params_dict):
+                    continue
+
                 # Skip loading extra bias for GPTQ models.
                 if name.endswith(".bias") and name not in params_dict:
                     continue

From 6e672daf62e7b03ff1dcf74e4206dad07d39d4ec Mon Sep 17 00:00:00 2001
From: Ilya Markov <markovilya197@gmail.com>
Date: Thu, 31 Jul 2025 22:58:38 +0200
Subject: [PATCH 099/224] Add FlashInfer allreduce RMSNorm Quant fusion
 (#21069)

Signed-off-by: ilmarkov <imarkov@redhat.com>
Signed-off-by: ilmarkov <markovilya197@gmail.com>
Co-authored-by: ilmarkov <imarkov@redhat.com>
---
 .buildkite/test-pipeline.yaml           |   1 +
 tests/compile/test_fusion_all_reduce.py | 126 +++++-
 tests/utils.py                          |  12 +
 vllm/compilation/collective_fusion.py   | 533 ++++++++++++++++++++++--
 vllm/config.py                          |   2 +-
 5 files changed, 606 insertions(+), 68 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index a7fe200559305..2f6cc45be77e6 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -353,6 +353,7 @@ steps:
     - pytest -v -s compile/test_silu_mul_quant_fusion.py
     - pytest -v -s compile/test_sequence_parallelism.py
     - pytest -v -s compile/test_async_tp.py
+    - pytest -v -s compile/test_fusion_all_reduce.py
 
 - label: PyTorch Fullgraph Smoke Test # 9min
   mirror_hardwares: [amdexperimental]
diff --git a/tests/compile/test_fusion_all_reduce.py b/tests/compile/test_fusion_all_reduce.py
index b8d64247f6beb..b394e0035c689 100644
--- a/tests/compile/test_fusion_all_reduce.py
+++ b/tests/compile/test_fusion_all_reduce.py
@@ -7,22 +7,26 @@ import torch
 
 import vllm.envs as envs
 from vllm.compilation.collective_fusion import AllReduceFusionPass
+from vllm.compilation.fix_functionalization import FixFunctionalizationPass
+from vllm.compilation.noop_elimination import NoOpEliminationPass
 from vllm.config import (CompilationConfig, CompilationLevel, DeviceConfig,
                          ModelConfig, PassConfig, VllmConfig)
 from vllm.distributed import tensor_model_parallel_all_reduce
 from vllm.distributed.parallel_state import (init_distributed_environment,
                                              initialize_model_parallel)
 from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
+    GroupShape, QuantFP8)
 from vllm.platforms import current_platform
 from vllm.utils import update_environment_variables
 
-from ..utils import multi_gpu_test
+from ..utils import has_module_attribute, multi_gpu_test
 from .backend import TestBackend
 
 
 class TestAllReduceRMSNormModel(torch.nn.Module):
 
-    def __init__(self, hidden_size=16, eps=1e-6):
+    def __init__(self, hidden_size=16, token_num=16, eps=1e-6):
         super().__init__()
         self.hidden_size = hidden_size
         self.eps = eps
@@ -43,7 +47,7 @@ class TestAllReduceRMSNormModel(torch.nn.Module):
 
 class TestAllReduceFusedAddRMSNormModel(torch.nn.Module):
 
-    def __init__(self, hidden_size=16, eps=1e-6):
+    def __init__(self, hidden_size=16, token_num=16, eps=1e-6):
         super().__init__()
         self.hidden_size = hidden_size
         self.eps = eps
@@ -62,24 +66,101 @@ class TestAllReduceFusedAddRMSNormModel(torch.nn.Module):
         return [torch.ops.vllm.flashinfer_trtllm_fused_allreduce_norm.default]
 
 
+class TestAllReduceFusedAddRMSNormStaticQuantFP8Model(torch.nn.Module):
+
+    def __init__(self, hidden_size=16, token_num=16, eps=1e-6):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.eps = eps
+        self.norm = RMSNorm(hidden_size, eps)
+        self.quant_fp8 = QuantFP8(static=True,
+                                  group_shape=GroupShape.PER_TENSOR)
+        self.scale = torch.rand(1, dtype=torch.float32)
+        self.output = torch.empty((token_num, hidden_size),
+                                  dtype=torch.float32)
+
+    def forward(self, hidden_states, residual):
+        view = hidden_states.reshape(-1, self.hidden_size)
+        all_reduce = tensor_model_parallel_all_reduce(view)
+        norm_output, residual_output = self.norm(all_reduce, residual)
+        torch.ops._C.static_scaled_fp8_quant(self.output,
+                                             norm_output.contiguous(),
+                                             self.scale)
+        return self.output, residual_output
+
+    def ops_in_model_after(self):
+        return [torch.ops.vllm.flashinfer_trtllm_fused_allreduce_norm.default]
+
+    def ops_in_model_before(self):
+        return [
+            torch.ops.vllm.all_reduce.default,
+            torch.ops._C.static_scaled_fp8_quant.default
+        ]
+
+
+class TestAllReduceFusedAddRMSNormStaticQuantFP4Model(torch.nn.Module):
+
+    def __init__(self, hidden_size=16, token_num=16, eps=1e-6):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.eps = eps
+        self.norm = RMSNorm(hidden_size, eps)
+        self.scale = torch.rand(1, dtype=torch.float32)
+        self.output = torch.empty((token_num, hidden_size),
+                                  dtype=torch.float32)
+
+        round_up = lambda x, y: (x + y - 1) // y * y
+        rounded_m = round_up(token_num, 128)
+        scale_n = hidden_size // 16
+        rounded_n = round_up(scale_n, 4)
+        self.output_scale = torch.empty((rounded_m, rounded_n // 4),
+                                        dtype=torch.int32)
+
+    def forward(self, hidden_states, residual):
+        view = hidden_states.reshape(-1, self.hidden_size)
+        all_reduce = tensor_model_parallel_all_reduce(view)
+        norm_output, residual_output = self.norm(all_reduce, residual)
+        norm_output = norm_output.reshape(-1, norm_output.shape[-1])
+        torch.ops._C.scaled_fp4_quant(self.output, norm_output,
+                                      self.output_scale, self.scale)
+        return self.output, residual_output, self.output_scale
+
+    def ops_in_model_after(self):
+        return [torch.ops.vllm.flashinfer_trtllm_fused_allreduce_norm.default]
+
+    def ops_in_model_before(self):
+        return [
+            torch.ops.vllm.all_reduce.default,
+            torch.ops._C.scaled_fp4_quant.default
+        ]
+
+
 @multi_gpu_test(num_gpus=2)
-@pytest.mark.parametrize(
-    "test_model",
-    [TestAllReduceRMSNormModel, TestAllReduceFusedAddRMSNormModel])
+@pytest.mark.parametrize("test_model", [
+    TestAllReduceRMSNormModel,
+    TestAllReduceFusedAddRMSNormModel,
+    TestAllReduceFusedAddRMSNormStaticQuantFP8Model,
+    TestAllReduceFusedAddRMSNormStaticQuantFP4Model,
+])
 @pytest.mark.parametrize("batch_size", [8])
 @pytest.mark.parametrize("seq_len", [8])
-@pytest.mark.parametrize("hidden_size", [4096])
+@pytest.mark.parametrize("hidden_size", [16])
 @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
 @pytest.mark.skipif(envs.VLLM_TARGET_DEVICE not in ["cuda"],
                     reason="Only test on CUDA")
-@pytest.mark.skipif(not find_spec("flashinfer"),
-                    reason="flashinfer is not installed")
-@pytest.mark.skipif(not current_platform.is_device_capability(100),
-                    reason="Only test on SM100")
+@pytest.mark.skipif(
+    not find_spec("flashinfer")
+    or not has_module_attribute("flashinfer.comm", "trtllm_allreduce_fusion"),
+    reason="flashinfer is not found or flashinfer "
+    "is not compiled with trtllm_allreduce_fusion")
 def test_all_reduce_fusion_pass_replace(test_model: torch.nn.Module,
                                         batch_size: int, seq_len: int,
                                         hidden_size: int, dtype: torch.dtype):
     num_processes = 2
+    if (test_model == TestAllReduceFusedAddRMSNormStaticQuantFP4Model
+            and not current_platform.has_device_capability(100)):
+        pytest.skip("Skip as nvfp4 is only supported on "
+                    "devices with compute capability 10.0 (Blackwell)")
 
     def run_torch_spawn(fn, nprocs):
         torch.multiprocessing.spawn(fn,
@@ -113,12 +194,11 @@ def all_reduce_fusion_pass_on_test_model(local_rank: int, world_size: int,
     init_distributed_environment()
     initialize_model_parallel(tensor_model_parallel_size=world_size)
 
-    vllm_config = VllmConfig(
-        compilation_config=CompilationConfig(level=CompilationLevel.PIECEWISE,
-                                             custom_ops=["+rms_norm"],
-                                             compile_sizes=[2, 4, 8]))
+    vllm_config = VllmConfig(compilation_config=CompilationConfig(
+        level=CompilationLevel.PIECEWISE,
+        custom_ops=["+rms_norm", "+quant_fp8"]))
     vllm_config.compilation_config.pass_config = PassConfig(
-        enable_fi_allreduce_fusion=True)
+        enable_fi_allreduce_fusion=True, enable_noop=True)
     vllm_config.device_config = DeviceConfig(device=torch.device("cuda"))
 
     # this is a fake model name to construct the model config
@@ -130,14 +210,16 @@ def all_reduce_fusion_pass_on_test_model(local_rank: int, world_size: int,
                                            seed=42)
 
     all_reduce_fusion_pass = AllReduceFusionPass(vllm_config)
-    backend = TestBackend(all_reduce_fusion_pass)
+    noop_pass = NoOpEliminationPass(vllm_config)
+    func_pass = FixFunctionalizationPass(vllm_config)
 
-    model = test_model_cls(hidden_size)
+    backend = TestBackend(all_reduce_fusion_pass, noop_pass, func_pass)
 
-    hidden_states = torch.randn((batch_size * seq_len, hidden_size),
-                                requires_grad=False)
-    residual = torch.randn((batch_size * seq_len, hidden_size),
-                           requires_grad=False)
+    token_num = batch_size * seq_len
+    model = test_model_cls(hidden_size, token_num)
+
+    hidden_states = torch.randn((token_num, hidden_size), requires_grad=False)
+    residual = torch.randn((token_num, hidden_size), requires_grad=False)
 
     compiled_model = torch.compile(model, backend=backend)
     compiled_model(hidden_states, residual)
diff --git a/tests/utils.py b/tests/utils.py
index f4317e6bdb406..1c1a1cc6014ec 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -4,6 +4,7 @@
 import asyncio
 import copy
 import functools
+import importlib
 import os
 import signal
 import subprocess
@@ -974,3 +975,14 @@ def get_client_text_logprob_generations(
     return [(text_generations, text,
              (None if x.logprobs is None else x.logprobs.top_logprobs))
             for completion in completions for x in completion.choices]
+
+
+def has_module_attribute(module_name, attribute_name):
+    """
+    Helper function to check if a module has a specific attribute.
+    """
+    try:
+        module = importlib.import_module(module_name)
+        return hasattr(module, attribute_name)
+    except ImportError:
+        return False
diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py
index cb99fe8310e73..6ae50245ed3a8 100644
--- a/vllm/compilation/collective_fusion.py
+++ b/vllm/compilation/collective_fusion.py
@@ -37,6 +37,8 @@ logger = init_logger(__name__)
 ALLREDUCE_OP = torch.ops.vllm.all_reduce.default
 RMS_OP = torch.ops._C.rms_norm.default
 RMS_ADD_OP = torch.ops._C.fused_add_rms_norm.default
+STATIC_FP8_QUANT_OP = torch.ops._C.static_scaled_fp8_quant.default
+STATIC_FP4_QUANT_OP = torch.ops._C.scaled_fp4_quant.default
 
 
 class BasePattern:
@@ -394,7 +396,7 @@ if flashinfer_comm is not None:
     # Max size of the input tensor per world size
     # to use flashinfer fused allreduce
     _FI_MAX_SIZES = {
-        2: MiB,  # 1MB
+        2: 64 * MiB,  # 64MB
         4: MiB,  # 1MB
         6: MiB // 2,  # 512KB
         8: MiB // 2,  # 512KB
@@ -414,9 +416,13 @@ if flashinfer_comm is not None:
         trigger_completion_at_end: bool,
         fp32_acc: bool,
         max_token_num: int,
+        pattern_code: int,
+        fuse_rms_quant: bool,
         norm_out: Optional[torch.Tensor] = None,
+        quant_out: Optional[torch.Tensor] = None,
+        scale_out: Optional[torch.Tensor] = None,
+        scale_factor: Optional[torch.Tensor] = None,
     ) -> None:
-
         num_tokens, hidden_size = allreduce_in.shape
         element_size = allreduce_in.element_size()
         current_tensor_size = num_tokens * hidden_size * element_size
@@ -425,7 +431,6 @@ if flashinfer_comm is not None:
             _FI_MAX_SIZES.get(world_size, _DEFAULT_FI_MAX_SIZE),
             max_fusion_size,
         )
-
         if use_flashinfer:
             assert (_FI_WORKSPACE_TENSOR is not None
                     ), "Flashinfer must be enabled when using flashinfer"
@@ -455,37 +460,65 @@ if flashinfer_comm is not None:
                 use_oneshot=True,
                 trigger_completion_at_end=trigger_completion_at_end,
                 fp32_acc=fp32_acc,
-                pattern_code=flashinfer_comm.AllReduceFusionPattern.
-                kARResidualRMSNorm,
+                pattern_code=pattern_code,
                 allreduce_out=None,
-                quant_out=None,
-                scale_out=None,
-                layout_code=None,
-                scale_factor=None,
+                quant_out=quant_out,
+                scale_out=scale_out,
+                # in vllm we only support swizzled layout
+                layout_code=flashinfer_comm.FP4QuantizationSFLayout.SWIZZLED,
+                scale_factor=scale_factor,
             )
         else:
             allreduce_out = tensor_model_parallel_all_reduce(allreduce_in)
-            if norm_out is None:
-                torch.ops._C.fused_add_rms_norm(allreduce_out, residual,
-                                                rms_gamma, rms_eps)
+            if (scale_factor is not None and scale_out is None
+                    and fuse_rms_quant):
+                # Do fused rms norm static fp8 quant fused op
+                if norm_out is None:
+                    torch.ops._C.fused_add_rms_norm_static_fp8_quant(
+                        quant_out, allreduce_out, residual, rms_gamma,
+                        scale_factor, rms_eps)
+                else:
+                    torch.ops._C.rms_norm_static_fp8_quant(
+                        quant_out, allreduce_out, rms_gamma, scale_factor,
+                        rms_eps)
             else:
-                torch.ops._C.rms_norm(norm_out, allreduce_out, rms_gamma,
-                                      rms_eps)
-            allreduce_in.copy_(allreduce_out)
+                if norm_out is None:
+                    torch.ops._C.fused_add_rms_norm(allreduce_out, residual,
+                                                    rms_gamma, rms_eps)
+                    norm_out = allreduce_out
+                else:
+                    torch.ops._C.rms_norm(norm_out, allreduce_out, rms_gamma,
+                                          rms_eps)
+                if scale_factor is not None:
+                    if scale_out is not None:
+                        torch.ops._C.scaled_fp4_quant(quant_out, norm_out,
+                                                      scale_out, scale_factor)
+                    else:
+                        torch.ops._C.static_scaled_fp8_quant(
+                            quant_out, norm_out, scale_factor)
+            if scale_factor is None or norm_out is not None:
+                # we need to return allreduce outpput
+                # in cases of non quant fused AR + RMS norm
+                # and fused AR + RMS norm + quant without fused add
+                allreduce_in.copy_(allreduce_out)
 
     def call_trtllm_fused_allreduce_norm_fake(
-        allreduce_in: torch.Tensor,
-        residual: torch.Tensor,
-        rms_gamma: torch.Tensor,
-        rms_eps: float,
-        world_rank: int,
-        world_size: int,
-        launch_with_pdl: bool,
-        trigger_completion_at_end: bool,
-        fp32_acc: bool,
-        max_token_num: int,
-        norm_out: Optional[torch.Tensor] = None,
-    ) -> None:
+            allreduce_in: torch.Tensor,
+            residual: torch.Tensor,
+            rms_gamma: torch.Tensor,
+            rms_eps: float,
+            world_rank: int,
+            world_size: int,
+            launch_with_pdl: bool,
+            trigger_completion_at_end: bool,
+            fp32_acc: bool,
+            max_token_num: int,
+            pattern_code: int,
+            fuse_rms_quant: bool,
+            norm_out: Optional[torch.Tensor] = None,
+            quant_out: Optional[torch.Tensor] = None,
+            scale_out: Optional[torch.Tensor] = None,
+            scale_factor: Optional[torch.Tensor] = None) -> None:
         pass
 
     direct_register_custom_op(
@@ -495,6 +528,8 @@ if flashinfer_comm is not None:
             "allreduce_in",
             "residual",
             "norm_out",
+            "quant_out",
+            "scale_out",
         ],
         fake_impl=call_trtllm_fused_allreduce_norm_fake,
         dispatch_key=current_platform.dispatch_key,
@@ -512,6 +547,7 @@ class FlashInferFusedAllReduceParams:
         world_size: int,
         use_fp32_lamport: bool = False,
         max_token_num: int = 1024,
+        fuse_rms_quant: bool = False,
     ):
         self.rank = rank
         self.world_size = world_size
@@ -521,6 +557,7 @@ class FlashInferFusedAllReduceParams:
         self.fp32_acc = True
         self.use_oneshot = False
         self.max_token_num = max_token_num
+        self.fuse_rms_quant = fuse_rms_quant
 
     def get_trtllm_fused_allreduce_kwargs(self):
         return {
@@ -530,10 +567,16 @@ class FlashInferFusedAllReduceParams:
             "trigger_completion_at_end": self.trigger_completion_at_end,
             "fp32_acc": self.fp32_acc,
             "max_token_num": self.max_token_num,
+            "fuse_rms_quant": self.fuse_rms_quant,
         }
 
 
-class AllReduceRMSNORMPattern(BasePattern):
+class AllReduceRMSNormPattern(BasePattern):
+    """
+    This pattern replaces the allreduce + rms norm (without residual) 
+    with fused flashinfer implementation.
+    Applies to allreduce + rmsnorm before attn in the first Transformer block.
+    """
 
     def __init__(
         self,
@@ -559,29 +602,34 @@ class AllReduceRMSNORMPattern(BasePattern):
 
         def pattern(input: torch.Tensor, rms_result: torch.Tensor,
                     weight: torch.Tensor):
-            all_reduce_output = tensor_model_parallel_all_reduce(input)
+            allreduce_output = tensor_model_parallel_all_reduce(input)
             rms = auto_functionalized(
                 RMS_OP,
                 result=rms_result,
-                input=all_reduce_output,
+                input=allreduce_output,
                 weight=weight,
                 epsilon=self.epsilon,
             )
-            return rms[1], all_reduce_output
+            # rms_result, allreduce_output
+            return rms[1], allreduce_output
 
         def replacement(input: torch.Tensor, rms_result: torch.Tensor,
                         weight: torch.Tensor):
             residual = torch.zeros_like(input)
             allreduce = auto_functionalized(
-                torch.ops.vllm.flashinfer_trtllm_fused_allreduce_norm.default,
+                flashinfer_trtllm_fused_allreduce_norm,
                 allreduce_in=input,
                 residual=residual,
                 norm_out=rms_result,
+                quant_out=None,
+                scale_out=None,
                 rms_gamma=weight,
                 rms_eps=self.epsilon,
+                pattern_code=flashinfer_comm.AllReduceFusionPattern.
+                kARResidualRMSNorm,
                 **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
             )
-
+            # rms_result, allreduce_in
             return allreduce[3], allreduce[1]
 
         pm.register_replacement(pattern, replacement, self.get_inputs(),
@@ -589,6 +637,11 @@ class AllReduceRMSNORMPattern(BasePattern):
 
 
 class AllReduceFusedAddRMSNormPattern(BasePattern):
+    """
+    This pattern replaces the allreduce + rms norm (with residual) 
+    with fused flashinfer implementation.
+    Applies to o_proj + rmsnorm after attn and mlp + rmsnorm before attn.
+    """
 
     def __init__(
         self,
@@ -615,33 +668,390 @@ class AllReduceFusedAddRMSNormPattern(BasePattern):
 
         def pattern(residual: torch.Tensor, input: torch.Tensor,
                     weight: torch.Tensor):
-            all_reduce_output = tensor_model_parallel_all_reduce(input)
+            allreduce_output = tensor_model_parallel_all_reduce(input)
             rms = auto_functionalized(
                 RMS_ADD_OP,
-                input=all_reduce_output,
+                input=allreduce_output,
                 residual=residual,
                 weight=weight,
                 epsilon=self.epsilon,
             )
+            # input, residual
             return rms[1], rms[2]
 
         def replacement(residual: torch.Tensor, input: torch.Tensor,
                         weight: torch.Tensor):
             allreduce = auto_functionalized(
-                torch.ops.vllm.flashinfer_trtllm_fused_allreduce_norm.default,
+                flashinfer_trtllm_fused_allreduce_norm,
                 allreduce_in=input,
                 residual=residual,
+                norm_out=None,
+                quant_out=None,
+                scale_out=None,
                 rms_gamma=weight,
                 rms_eps=self.epsilon,
-                norm_out=None,
+                pattern_code=flashinfer_comm.AllReduceFusionPattern.
+                kARResidualRMSNorm,
                 **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
             )
+            # allreduce_in, residual
             return allreduce[1], allreduce[2]
 
         pm.register_replacement(pattern, replacement, self.get_inputs(),
                                 pm.fwd_only, pm_pass)
 
 
+class AllReduceFusedRMSNormStaticQuantFP8Pattern(BasePattern):
+    """
+    This pattern replaces the allreduce + rms norm (without residual) 
+    + static fp8 quant with fused flashinfer implementation.
+    Applies to allreduce + rmsnorm + quant before attn 
+    in the first Transformer block.
+    """
+
+    def __init__(self, epsilon: float, dtype: torch.dtype, device: str,
+                 allreduce_params: FlashInferFusedAllReduceParams):
+        super().__init__(dtype, device)
+        self.epsilon = epsilon
+        self.allreduce_params = allreduce_params
+        self.quant_dtype = torch.float8_e4m3fn
+
+    def register(self, pm_pass: PatternMatcherPass):
+
+        def get_inputs():
+            input = torch.zeros([1, 8, 4],
+                                device=self.device,
+                                dtype=self.dtype)
+            rmsnorm_result = torch.empty([1, 8, 4],
+                                         device=self.device,
+                                         dtype=self.dtype)
+            quant_result = torch.empty([1, 8, 4],
+                                       device=self.device,
+                                       dtype=self.quant_dtype)
+            weight = torch.empty([4], device=self.device, dtype=self.dtype)
+            scale = torch.tensor(1.0, device=self.device, dtype=torch.float32)
+            return [input, rmsnorm_result, quant_result, weight, scale]
+
+        def pattern(
+            input: torch.Tensor,
+            rmsnorm_result: torch.Tensor,
+            quant_result: torch.Tensor,
+            weight: torch.Tensor,
+            scale: torch.Tensor,
+        ):
+            all_reduce = tensor_model_parallel_all_reduce(input)
+            rmsnorm_out_tuple = auto_functionalized(RMS_OP,
+                                                    result=rmsnorm_result,
+                                                    input=all_reduce,
+                                                    weight=weight,
+                                                    epsilon=self.epsilon)
+
+            quant_out_tuple = auto_functionalized(STATIC_FP8_QUANT_OP,
+                                                  result=quant_result,
+                                                  input=rmsnorm_out_tuple[1],
+                                                  scale=scale)
+
+            # quant_out, allreduce_output
+            return quant_out_tuple[1], all_reduce
+
+        def replacement(input: torch.Tensor, result_rms: torch.Tensor,
+                        quant_result: torch.Tensor, weight: torch.Tensor,
+                        scale: torch.Tensor):
+            residual = torch.zeros_like(input)
+            allreduce = auto_functionalized(
+                flashinfer_trtllm_fused_allreduce_norm,
+                allreduce_in=input,
+                residual=residual,
+                norm_out=result_rms,
+                quant_out=quant_result,
+                scale_out=None,
+                rms_gamma=weight,
+                rms_eps=self.epsilon,
+                pattern_code=flashinfer_comm.AllReduceFusionPattern.
+                kARResidualRMSNormFP8Quant,  # we don't use norm_out afterwards
+                scale_factor=scale,
+                **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
+            )
+
+            # quant_out, allreduce_output
+            return allreduce[4], allreduce[1]
+
+        pm.register_replacement(pattern, replacement, get_inputs(),
+                                pm.fwd_only, pm_pass)
+
+
+class AllReduceFusedAddRMSNormStaticQuantFP8Pattern(BasePattern):
+    """
+    This pattern replaces the allreduce + rms norm (with residual)
+    + static fp8 quant with fused flashinfer implementation.
+    Applies to o_proj + rmsnorm after attn + quant and 
+    mlp + rmsnorm + quant before attn.
+    """
+
+    def __init__(self, epsilon: float, dtype: torch.dtype, device: str,
+                 allreduce_params: FlashInferFusedAllReduceParams):
+        super().__init__(dtype, device)
+        self.epsilon = epsilon
+        self.allreduce_params = allreduce_params
+        self.quant_dtype = torch.float8_e4m3fn
+
+    def register(self, pm_pass: PatternMatcherPass):
+
+        def get_inputs():
+            input = torch.empty([4, 4], device=self.device, dtype=self.dtype)
+
+            residual = torch.empty([4, 4],
+                                   device=self.device,
+                                   dtype=self.dtype)
+            weight = torch.empty([4, 4], device=self.device, dtype=self.dtype)
+            quant_result = torch.empty([4, 4],
+                                       device=self.device,
+                                       dtype=self.quant_dtype)
+            scale = torch.empty([1, 1],
+                                device=self.device,
+                                dtype=torch.float32)
+
+            return [
+                quant_result,
+                residual,
+                input,
+                weight,
+                scale,
+            ]
+
+        def pattern(
+            quant_result: torch.Tensor,
+            residual: torch.Tensor,
+            input: torch.Tensor,
+            weight: torch.Tensor,
+            scale: torch.Tensor,
+        ):
+            allreduce_output = tensor_model_parallel_all_reduce(input)
+
+            fused_add_rmsnorm_out_tuple = \
+            auto_functionalized(
+                RMS_ADD_OP,
+                input=allreduce_output,
+                residual=residual,
+                weight=weight,
+                epsilon=self.epsilon)
+            quant_out_tuple = auto_functionalized(
+                STATIC_FP8_QUANT_OP,
+                result=quant_result,
+                input=fused_add_rmsnorm_out_tuple[1],
+                scale=scale)
+
+            # quant_out, allreduce_output
+            return quant_out_tuple[1], fused_add_rmsnorm_out_tuple[2]
+
+        def replacement(quant_result: torch.Tensor, residual: torch.Tensor,
+                        input: torch.Tensor, weight: torch.Tensor,
+                        scale: torch.Tensor):
+            allreduce = auto_functionalized(
+                flashinfer_trtllm_fused_allreduce_norm,
+                allreduce_in=input,
+                residual=residual,
+                norm_out=None,
+                quant_out=quant_result,
+                scale_out=None,
+                rms_gamma=weight,
+                rms_eps=self.epsilon,
+                pattern_code=flashinfer_comm.AllReduceFusionPattern.
+                kARResidualRMSNormFP8Quant,  # we don't use norm_out afterwards
+                scale_factor=scale,
+                **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
+            )
+            # # quant_out, rms_norm_residual
+            return allreduce[4], allreduce[2]
+
+        pm.register_replacement(pattern, replacement, get_inputs(),
+                                pm.fwd_only, pm_pass)
+
+
+class AllReduceFusedRMSNormStaticQuantNVFP4Pattern(BasePattern):
+    """
+    This pattern replaces the allreduce + rms norm (without residual) 
+    + static nvfp4 quant with fused flashinfer implementation.
+    Applies to allreduce + rmsnorm + quant before attn 
+    in the first Transformer block.
+    """
+
+    def __init__(self, epsilon: float, dtype: torch.dtype, device: str,
+                 allreduce_params: FlashInferFusedAllReduceParams):
+        super().__init__(dtype, device)
+        self.epsilon = epsilon
+        self.allreduce_params = allreduce_params
+
+    def register(self, pm_pass: PatternMatcherPass):
+
+        def get_inputs():
+            input = torch.empty([1, 16, 16],
+                                device=self.device,
+                                dtype=self.dtype)
+
+            rmsnorm_result = torch.empty([1, 16, 16],
+                                         device=self.device,
+                                         dtype=self.dtype)
+            quant_result = torch.empty((16, 8),
+                                       device=self.device,
+                                       dtype=torch.uint8)
+            input_global_scale = torch.empty([1, 1],
+                                             device=self.device,
+                                             dtype=torch.float32)
+            weight = torch.empty([16], device=self.device, dtype=self.dtype)
+            output_scale = torch.empty([128, 4],
+                                       device=self.device,
+                                       dtype=torch.int32)
+
+            return [
+                input, rmsnorm_result, quant_result, weight,
+                input_global_scale, output_scale
+            ]
+
+        def pattern(
+            input: torch.Tensor,
+            rmsnorm_result: torch.Tensor,
+            quant_result: torch.Tensor,
+            weight: torch.Tensor,
+            input_global_scale: torch.Tensor,
+            output_scale: torch.Tensor,
+        ):
+            all_reduce = tensor_model_parallel_all_reduce(input)
+            rmsnorm_out_tuple = auto_functionalized(RMS_OP,
+                                                    result=rmsnorm_result,
+                                                    input=all_reduce,
+                                                    weight=weight,
+                                                    epsilon=self.epsilon)
+
+            quant_out_tuple = auto_functionalized(
+                STATIC_FP4_QUANT_OP,
+                output=quant_result,
+                input=rmsnorm_out_tuple[1],
+                output_scale=output_scale,
+                input_scale=input_global_scale)
+
+            # quant_out, allreduce_output, output_scale
+            return quant_out_tuple[1], all_reduce, quant_out_tuple[2]
+
+        def replacement(input: torch.Tensor, result_rms: torch.Tensor,
+                        quant_result: torch.Tensor, weight: torch.Tensor,
+                        input_global_scale: torch.Tensor,
+                        output_scale: torch.Tensor):
+            residual = torch.zeros_like(input)
+            allreduce = auto_functionalized(
+                flashinfer_trtllm_fused_allreduce_norm,
+                allreduce_in=input,
+                residual=residual,
+                norm_out=result_rms,
+                quant_out=quant_result,
+                scale_out=output_scale,
+                rms_gamma=weight,
+                rms_eps=self.epsilon,
+                pattern_code=flashinfer_comm.AllReduceFusionPattern.
+                kARResidualRMSNormFP4Quant,  # we don't use norm_out afterwards
+                scale_factor=input_global_scale,
+                **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
+            )
+
+            # quant_out, allreduce_output, output_scale
+            return allreduce[4], allreduce[1], allreduce[5]
+
+        pm.register_replacement(pattern, replacement, get_inputs(),
+                                pm.fwd_only, pm_pass)
+
+
+class AllReduceFusedAddRMSNormStaticQuantNVFP4Pattern(BasePattern):
+    """
+    This pattern replaces the allreduce + rms norm (with residual)
+    + static nvfp4 quant with fused flashinfer implementation.
+    Applies to o_proj + rmsnorm after attn + quant and 
+    mlp + rmsnorm + quant before attn.
+    """
+
+    def __init__(self, epsilon: float, dtype: torch.dtype, device: str,
+                 allreduce_params: FlashInferFusedAllReduceParams):
+        super().__init__(dtype, device)
+        self.epsilon = epsilon
+        self.allreduce_params = allreduce_params
+
+    def register(self, pm_pass: PatternMatcherPass):
+
+        def get_inputs():
+            input = torch.empty([16, 16], device=self.device, dtype=self.dtype)
+
+            residual = torch.empty([16, 16],
+                                   device=self.device,
+                                   dtype=self.dtype)
+            weight = torch.empty([16, 16],
+                                 device=self.device,
+                                 dtype=self.dtype)
+            quant_result = torch.empty((16, 8),
+                                       device=self.device,
+                                       dtype=torch.uint8)
+            input_global_scale = torch.empty([1, 1],
+                                             device=self.device,
+                                             dtype=torch.float32)
+            output_scale = torch.empty([128, 4],
+                                       device=self.device,
+                                       dtype=torch.int32)
+
+            return [
+                quant_result,
+                residual,
+                input,
+                output_scale,
+                weight,
+                input_global_scale,
+            ]
+
+        def pattern(quant_result: torch.Tensor, residual: torch.Tensor,
+                    input: torch.Tensor, output_scale: torch.Tensor,
+                    weight: torch.Tensor, input_global_scale: torch.Tensor):
+            allreduce_output = tensor_model_parallel_all_reduce(input)
+
+            fused_add_rmsnorm_out_tuple = \
+            auto_functionalized(
+                RMS_ADD_OP,
+                input=allreduce_output,
+                residual=residual,
+                weight=weight,
+                epsilon=self.epsilon)
+            quant_out_tuple = auto_functionalized(
+                STATIC_FP4_QUANT_OP,
+                output=quant_result,
+                input=fused_add_rmsnorm_out_tuple[1],
+                output_scale=output_scale,
+                input_scale=input_global_scale)
+
+            # quant_out, allreduce_output, output_scale
+            return quant_out_tuple[1], fused_add_rmsnorm_out_tuple[
+                2], quant_out_tuple[2]
+
+        def replacement(quant_result: torch.Tensor, residual: torch.Tensor,
+                        input: torch.Tensor, output_scale: torch.Tensor,
+                        weight: torch.Tensor,
+                        input_global_scale: torch.Tensor):
+            allreduce = auto_functionalized(
+                flashinfer_trtllm_fused_allreduce_norm,
+                allreduce_in=input,
+                residual=residual,
+                norm_out=None,
+                quant_out=quant_result,
+                scale_out=output_scale,
+                rms_gamma=weight,
+                rms_eps=self.epsilon,
+                pattern_code=flashinfer_comm.AllReduceFusionPattern.
+                kARResidualRMSNormFP4Quant,  # we don't use norm_out afterwards
+                scale_factor=input_global_scale,
+                **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
+            )
+            # quant_out, rms_norm_residual, output_scale
+            return allreduce[4], allreduce[2], allreduce[5]
+
+        pm.register_replacement(pattern, replacement, get_inputs(),
+                                pm.fwd_only, pm_pass)
+
+
 class AllReduceFusionPass(VllmInductorPass):
 
     def __init__(self, config: VllmConfig):
@@ -671,13 +1081,16 @@ class AllReduceFusionPass(VllmInductorPass):
                 self.tp_size,
             )
             return
-
+        max_num_token = min(
+            _FI_MAX_SIZES.get(self.tp_size, _DEFAULT_FI_MAX_SIZE) //
+            (self.hidden_dim * self.tp_size * (4 if use_fp32_lamport else 2)),
+            config.compilation_config.pass_config.
+            fi_allreduce_fusion_max_token_num)
         self.ipc_handles, workspace_tensor = (
             flashinfer_comm.trtllm_create_ipc_workspace_for_all_reduce_fusion(
                 tp_rank=rank,
                 tp_size=self.tp_size,
-                max_token_num=config.compilation_config.pass_config.
-                fi_allreduce_fusion_max_token_num,
+                max_token_num=max_num_token,
                 hidden_dim=self.hidden_dim,
                 group=self.group,
                 use_fp32_lamport=use_fp32_lamport,
@@ -689,12 +1102,38 @@ class AllReduceFusionPass(VllmInductorPass):
             rank=rank,
             world_size=self.tp_size,
             use_fp32_lamport=use_fp32_lamport,
-            max_token_num=config.compilation_config.pass_config.
-            fi_allreduce_fusion_max_token_num,
-        )
+            max_token_num=max_num_token,
+            # fuse rms norm static fp8 quant fused op
+            # in fallback path, when we don't use flashinfer
+            fuse_rms_quant=config.compilation_config.pass_config.enable_fusion)
 
         for epsilon in [1e-5, 1e-6]:
-            AllReduceRMSNORMPattern(
+            AllReduceFusedRMSNormStaticQuantFP8Pattern(
+                epsilon,
+                self.model_dtype,
+                self.device,
+                self.allreduce_params,
+            ).register(self.patterns)
+            AllReduceFusedAddRMSNormStaticQuantFP8Pattern(
+                epsilon,
+                self.model_dtype,
+                self.device,
+                self.allreduce_params,
+            ).register(self.patterns)
+            if current_platform.has_device_capability(100):
+                AllReduceFusedRMSNormStaticQuantNVFP4Pattern(
+                    epsilon,
+                    self.model_dtype,
+                    self.device,
+                    self.allreduce_params,
+                ).register(self.patterns)
+                AllReduceFusedAddRMSNormStaticQuantNVFP4Pattern(
+                    epsilon,
+                    self.model_dtype,
+                    self.device,
+                    self.allreduce_params,
+                ).register(self.patterns)
+            AllReduceRMSNormPattern(
                 epsilon,
                 self.model_dtype,
                 self.device,
@@ -707,6 +1146,10 @@ class AllReduceFusionPass(VllmInductorPass):
                 self.allreduce_params,
             ).register(self.patterns)
 
+            # WARNING: This is a hack to clear the pattern matcher cache
+            # and allow multiple values of epsilon.
+            torch._inductor.pattern_matcher._seen_patterns.clear()
+
         self.disabled = False
 
     def __call__(self, graph: fx.Graph):
@@ -723,5 +1166,5 @@ class AllReduceFusionPass(VllmInductorPass):
         if self.disabled:
             return
         if flashinfer_comm is not None:
-            flashinfer_comm.trtllm_destroy_ipc_workspace(
+            flashinfer_comm.trtllm_destroy_ipc_workspace_for_all_reduce(
                 self.ipc_handles, self.group)
diff --git a/vllm/config.py b/vllm/config.py
index 27dde5f1b1f6f..edad5dd0406bf 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -4051,7 +4051,7 @@ class PassConfig:
     """Whether to enable async TP."""
     enable_fi_allreduce_fusion: bool = False
     """Whether to enable flashinfer allreduce fusion."""
-    fi_allreduce_fusion_max_token_num: int = 1024
+    fi_allreduce_fusion_max_token_num: int = 16384
     """Max number of tokens to used in flashinfer allreduce fusion."""
 
     # TODO(luka) better pass enabling system.

From c3e0e9337ef0af04d2d18b263a6a0f7deed75856 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Thu, 31 Jul 2025 18:26:11 -0400
Subject: [PATCH 100/224] [Feature] Add Flashinfer MoE Support for Compressed
 Tensor NVFP4 (#21639)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 .../compressed_tensors_moe.py                 |  53 +++++-
 .../layers/quantization/modelopt.py           | 150 +++--------------
 .../quantization/utils/flashinfer_fp4_moe.py  | 154 ++++++++++++++++++
 .../quantization/utils/nvfp4_moe_support.py   |  59 +++++++
 4 files changed, 287 insertions(+), 129 deletions(-)
 create mode 100644 vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
 create mode 100644 vllm/model_executor/layers/quantization/utils/nvfp4_moe_support.py

diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
index 17b41e8a1c23c..09d8890888fa8 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -17,9 +17,14 @@ from vllm.model_executor.layers.fused_moe import (
     FusedMoE, FusedMoEActivationFormat, FusedMoEConfig, FusedMoEMethodBase,
     FusedMoEPermuteExpertsUnpermute, FusedMoEPrepareAndFinalize,
     FusedMoeWeightScaleSupported)
+from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import (  # noqa
+    FlashInferCutlassMoEPrepareAndFinalize)
 from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import (  # noqa
     WNA16_SUPPORTED_BITS, WNA16_SUPPORTED_TYPES_MAP)
 from vllm.model_executor.layers.quantization.utils import replace_parameter
+from vllm.model_executor.layers.quantization.utils.flashinfer_fp4_moe import (
+    build_flashinfer_fp4_cutlass_moe_kernel,
+    flashinfer_fp4_cutlass_moe_forward, reorder_w1w3_to_w3w1)
 from vllm.model_executor.layers.quantization.utils.marlin_utils import (
     check_moe_marlin_supports_layer, marlin_make_workspace_new,
     marlin_moe_permute_scales)
@@ -28,7 +33,7 @@ from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
 from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
     prepare_moe_fp8_layer_for_marlin)
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    cutlass_fp4_supported, swizzle_blockscale)
+    swizzle_blockscale)
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
     all_close_1d, normalize_e4m3fn_to_e4m3fnuz, per_tensor_dequantize)
 from vllm.model_executor.utils import set_weight_attrs
@@ -96,8 +101,14 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
 class CompressedTensorsW4A4MoeMethod(CompressedTensorsMoEMethod):
 
     def __init__(self):
-        self.use_marlin = not cutlass_fp4_supported()
+        from vllm.model_executor.layers.quantization.utils.nvfp4_moe_support import (  # noqa: E501
+            detect_nvfp4_moe_support)
+        _nvfp4 = detect_nvfp4_moe_support(self.__class__.__name__)
+        self.cutlass_nvfp4_supported = _nvfp4.cutlass_supported
+        self.allow_flashinfer_cutlass = _nvfp4.allow_flashinfer_cutlass
+        self.use_marlin = _nvfp4.use_marlin
         self.group_size = 16
+        self.fused_experts = None  # type: ignore[assignment]
 
     def create_weights(self, layer: torch.nn.Module, num_experts: int,
                        hidden_size: int, intermediate_size_per_partition: int,
@@ -200,6 +211,14 @@ class CompressedTensorsW4A4MoeMethod(CompressedTensorsMoEMethod):
         layer.w2_weight = torch.nn.Parameter(layer.w2_weight_packed.data,
                                              requires_grad=False)
 
+        # reorder GEMM1 weights and block scales for FlashInfer CUTLASS kernel.
+        if self.allow_flashinfer_cutlass:
+            w, s = reorder_w1w3_to_w3w1(layer.w13_weight.data,
+                                        layer.w13_weight_scale.data,
+                                        dim=-2)
+            layer.w13_weight = torch.nn.Parameter(w, requires_grad=False)
+            layer.w13_weight_scale = torch.nn.Parameter(s, requires_grad=False)
+
         if not torch.allclose(layer.w13_weight_global_scale[:, 0],
                               layer.w13_weight_global_scale[:, 1]):
             logger.warning_once(
@@ -246,6 +265,21 @@ class CompressedTensorsW4A4MoeMethod(CompressedTensorsMoEMethod):
         layer.w2_input_scale_quant = torch.nn.Parameter(
             (layer.w2_input_global_scale), requires_grad=False)
 
+    def maybe_swap_experts_impl(self, moe_parallel_config):
+        if not self.allow_flashinfer_cutlass:
+            return
+        self.fused_experts = build_flashinfer_fp4_cutlass_moe_kernel(
+            moe_parallel_config)
+
+    def select_gemm_impl(self, prepare_finalize, moe):
+        """Return the appropriate GEMM experts implementation."""
+        assert moe is not None and prepare_finalize is not None
+        from vllm.model_executor.layers.quantization.utils.flashinfer_fp4_moe import (  # noqa: E501
+            select_nvfp4_gemm_impl)
+
+        return select_nvfp4_gemm_impl(self.allow_flashinfer_cutlass, moe,
+                                      logger)
+
     def apply(
         self,
         layer: torch.nn.Module,
@@ -303,10 +337,23 @@ class CompressedTensorsW4A4MoeMethod(CompressedTensorsMoEMethod):
                 global_num_experts=global_num_experts,
                 expert_map=expert_map)
 
+        # FlashInfer fused experts path
+        if self.fused_experts is not None:
+            return flashinfer_fp4_cutlass_moe_forward(
+                self.fused_experts,
+                layer,
+                x,
+                topk_weights,
+                topk_ids,
+                activation=activation,
+                global_num_experts=global_num_experts,
+                expert_map=expert_map,
+                apply_router_weight_on_input=apply_router_weight_on_input,
+            )
+
         assert expert_map is None, ("Expert Parallelism / expert_map "
                                     "is currently not supported for "
                                     "CompressedTensorsW4A4MoeMethod.")
-
         from vllm.model_executor.layers.fused_moe.cutlass_moe import (
             cutlass_moe_fp4)
 
diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py
index b8ffcf90c022b..0334a2824512d 100644
--- a/vllm/model_executor/layers/quantization/modelopt.py
+++ b/vllm/model_executor/layers/quantization/modelopt.py
@@ -10,11 +10,8 @@ from torch.nn.parameter import Parameter
 import vllm.envs as envs
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
-from vllm.distributed import get_ep_group
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.config import FusedMoEParallelConfig
-from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import (  # noqa: E501
-    FlashInferCutlassMoEPrepareAndFinalize)
 from vllm.model_executor.layers.fused_moe.layer import (
     FusedMoE, FusedMoEMethodBase, FusedMoeWeightScaleSupported)
 from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
@@ -23,6 +20,9 @@ from vllm.model_executor.layers.quantization import QuantizationMethods
 from vllm.model_executor.layers.quantization.base_config import (
     QuantizationConfig, QuantizeMethodBase)
 from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
+from vllm.model_executor.layers.quantization.utils.flashinfer_fp4_moe import (
+    build_flashinfer_fp4_cutlass_moe_kernel,
+    flashinfer_fp4_cutlass_moe_forward, reorder_w1w3_to_w3w1)
 from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
     apply_flashinfer_per_tensor_scale_fp8, rotate_flashinfer_fp8_moe_weights,
     swap_w13_to_w31)
@@ -35,7 +35,6 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
     Fp8LinearOp, requantize_with_max_scale)
 from vllm.model_executor.parameter import (ModelWeightParameter,
                                            PerTensorScaleParameter)
-from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
 from vllm.utils.flashinfer import has_flashinfer_moe
 
@@ -869,28 +868,12 @@ class ModelOptNvFp4FusedMoE(FusedMoEMethodBase):
 
     def __init__(self, quant_config: ModelOptNvFp4Config):
         self.quant_config = quant_config
-        self.cutlass_nvfp4_supported = cutlass_fp4_supported()
-        self.use_marlin = False
-        self.allow_flashinfer_cutlass = False
-
-        if envs.VLLM_USE_FLASHINFER_MOE_FP4:
-            if self.cutlass_nvfp4_supported and current_platform.is_cuda() \
-               and current_platform.is_device_capability(100):
-                logger.info_once(
-                    "Using FlashInfer kernels for ModelOptNvFp4FusedMoE.")
-                self.allow_flashinfer_cutlass = True
-            else:
-                logger.warning_once(
-                    "Flashinfer CUTLASS Fused MoE not supported "
-                    "or found on the current platform.")
-
-        if not self.cutlass_nvfp4_supported:
-            if is_fp4_marlin_supported():
-                self.use_marlin = True
-            else:
-                raise ValueError("Current platform does not support NVFP4"
-                                 " quantization. Please use Blackwell and"
-                                 " above.")
+        from vllm.model_executor.layers.quantization.utils.nvfp4_moe_support import (  # noqa: E501
+            detect_nvfp4_moe_support)
+        _nvfp4 = detect_nvfp4_moe_support(self.__class__.__name__)
+        self.cutlass_nvfp4_supported = _nvfp4.cutlass_supported
+        self.allow_flashinfer_cutlass = _nvfp4.allow_flashinfer_cutlass
+        self.use_marlin = _nvfp4.use_marlin
 
         self.fused_experts = None  # type: ignore
 
@@ -900,29 +883,8 @@ class ModelOptNvFp4FusedMoE(FusedMoEMethodBase):
     ):
         if not self.allow_flashinfer_cutlass:
             return
-
-        logger.debug_once("FlashInferExperts")
-        # default to TP/EP case only
-
-        experts_kwargs: dict[str, Any] = {
-            "use_nvfp4_w4a4": True,
-            "use_dp": moe_parallel_config.dp_size > 1,
-            "ep_rank": moe_parallel_config.ep_rank,
-            "ep_size": moe_parallel_config.ep_size,
-            "tp_rank": moe_parallel_config.tp_rank,
-            "tp_size": moe_parallel_config.tp_size,
-        }
-
-        from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (  # noqa: E501
-            FlashInferExperts)
-        experts = FlashInferExperts(**experts_kwargs)
-        self.fused_experts = mk.FusedMoEModularKernel(
-            FlashInferCutlassMoEPrepareAndFinalize(
-                quant_dtype=torch.uint8,
-                #meaning 2x e2m1 packed in one, kernel requirement
-            ),
-            experts,
-        )
+        self.fused_experts = build_flashinfer_fp4_cutlass_moe_kernel(
+            moe_parallel_config)
 
     # This method update self.fused_experts
     # only prepare_finalize is not None call select_gemm_impl
@@ -931,32 +893,12 @@ class ModelOptNvFp4FusedMoE(FusedMoEMethodBase):
     def select_gemm_impl(self, prepare_finalize,
                          moe) -> mk.FusedMoEPermuteExpertsUnpermute:
 
-        assert moe is not None
-        assert prepare_finalize is not None
-        experts = None
-        all2all_manager = get_ep_group().device_communicator.all2all_manager
-        assert all2all_manager is not None
-        if self.allow_flashinfer_cutlass:
-            from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (  # noqa: E501
-                FlashInferExperts)
-            logger.debug_once("Using FlashInferExperts")
-            experts = FlashInferExperts(
-                use_nvfp4_w4a4=True,
-                use_dp=moe.moe_parallel_config.dp_size > 1,
-                ep_rank=moe.moe_parallel_config.ep_rank,
-                ep_size=moe.moe_parallel_config.ep_size,
-                tp_rank=moe.moe_parallel_config.tp_rank,
-                tp_size=moe.moe_parallel_config.tp_size,
-            )
-        else:
-            assert moe.dp_size > 1
-            logger.debug_once("Using CutlassExpertsFp4")
-            # Currently CutlassExpertsFp4 doesn't support DP
-            raise ValueError("CutlassExpertsFp4 doesn't support DP. "
-                             "Use flashinfer CUTLASS FusedMoE backend instead "
-                             "(set VLLM_USE_FLASHINFER_MOE_FP4=1)")
+        assert moe is not None and prepare_finalize is not None
+        from vllm.model_executor.layers.quantization.utils.flashinfer_fp4_moe import (  # noqa: E501
+            select_nvfp4_gemm_impl)
 
-        return experts
+        return select_nvfp4_gemm_impl(self.allow_flashinfer_cutlass, moe,
+                                      logger)
 
     def uses_weight_scale_2_pattern(self) -> bool:
         """
@@ -1062,18 +1004,8 @@ class ModelOptNvFp4FusedMoE(FusedMoEMethodBase):
         gemm1_weight_scale = layer.w13_weight_scale.data
 
         if self.allow_flashinfer_cutlass:
-            dim = -2
-            size = gemm1_weight.size(dim)
-            assert size % 2 == 0, f"Expected even size in dim {dim}, got {size}"
-            half = size // 2
-
-            # Reorder weight
-            w1, w3 = gemm1_weight.split(half, dim=dim)
-            gemm1_weight = torch.cat([w3, w1], dim=dim).contiguous()
-
-            # Reorder scale
-            s1, s3 = gemm1_weight_scale.split(half, dim=dim)
-            gemm1_weight_scale = torch.cat([s3, s1], dim=dim).contiguous()
+            gemm1_weight, gemm1_weight_scale = reorder_w1w3_to_w3w1(
+                gemm1_weight, gemm1_weight_scale, dim=-2)
 
         layer.w13_weight = Parameter(gemm1_weight, requires_grad=False)
         layer.w13_weight_scale = Parameter(gemm1_weight_scale,
@@ -1217,49 +1149,15 @@ class ModelOptNvFp4FusedMoE(FusedMoEMethodBase):
                 expert_map=expert_map,
                 apply_router_weight_on_input=apply_router_weight_on_input)
         else:
-            # TP or DP case
-            from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (  # noqa: E501
-                is_valid_flashinfer_cutlass_fused_moe)
-            assert is_valid_flashinfer_cutlass_fused_moe(
-                x, layer.w13_weight, layer.w2_weight), (
-                    "Flashinfer CUTLASS Fused MoE not applicable!")
-
-            a1_gscale = layer.w13_input_scale_quant
-            a2_gscale = layer.w2_input_scale_quant
-            extra_expert_args = {
-                'g1_alphas': layer.g1_alphas,
-                'g2_alphas': layer.g2_alphas,
-                'out_dtype': x.dtype,
-                # Avoid confusion with a1_scale and a2_scale
-                # where are batch size related.
-                'a1_gscale': a1_gscale,
-                'a2_gscale': a2_gscale,
-            }
-            extra_prepare_args = {
-                'use_dp': layer.dp_size > 1,
-                'local_tokens': x.shape[0],
-                'a1_gscale': a1_gscale,
-            }
-            extra_finalize_args = {
-                'use_dp': layer.dp_size > 1,
-                'local_tokens': x.shape[0],
-            }
-
-            out = self.fused_experts(
-                hidden_states=x,
-                w1=layer.w13_weight,
-                w2=layer.w2_weight,
-                topk_weights=topk_weights,
-                topk_ids=topk_ids,
-                inplace=False,  # TODO(shuw): fix later, now output is high prec
+            out = flashinfer_fp4_cutlass_moe_forward(
+                self.fused_experts,
+                layer,
+                x,
+                topk_weights,
+                topk_ids,
                 activation=activation,
                 global_num_experts=global_num_experts,
                 expert_map=expert_map,
-                w1_scale=layer.w13_blockscale_swizzled,
-                w2_scale=layer.w2_blockscale_swizzled,
                 apply_router_weight_on_input=apply_router_weight_on_input,
-                extra_expert_args=extra_expert_args,
-                extra_prepare_args=extra_prepare_args,
-                extra_finalize_args=extra_finalize_args,
             )
         return out
diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
new file mode 100644
index 0000000000000..4c617e226041f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Utility helpers for NVFP4 + FlashInfer fused-MoE path"""
+from __future__ import annotations
+
+from typing import Optional
+
+import torch
+
+import vllm.envs as envs
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.config import FusedMoEParallelConfig
+from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
+    FlashInferExperts, is_valid_flashinfer_cutlass_fused_moe)
+from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import (  # noqa: E501
+    FlashInferCutlassMoEPrepareAndFinalize)
+from vllm.platforms import current_platform
+
+logger = init_logger(__name__)
+
+__all__ = [
+    "is_flashinfer_fp4_cutlass_moe_available",
+    "reorder_w1w3_to_w3w1",
+    "build_flashinfer_fp4_cutlass_moe_kernel",
+    "flashinfer_fp4_cutlass_moe_forward",
+]
+
+
+def is_flashinfer_fp4_cutlass_moe_available() -> bool:
+    """Return ``True`` when FlashInfer CUTLASS NV-FP4 kernels can be used."""
+    return (envs.VLLM_USE_FLASHINFER_MOE_FP4 and current_platform.is_cuda()
+            and current_platform.is_device_capability(100))
+
+
+def reorder_w1w3_to_w3w1(weight: torch.Tensor,
+                         scale: torch.Tensor,
+                         dim: int = -2) -> tuple[torch.Tensor, torch.Tensor]:
+    """Re-order the concatenated `[w1, w3]` tensors to `[w3, w1]`"""
+    size = weight.size(dim)
+    assert size % 2 == 0, f"Expected even size in dim {dim}, got {size}"
+    half = size // 2
+
+    w1, w3 = weight.split(half, dim=dim)
+    s1, s3 = scale.split(half, dim=dim)
+
+    return (torch.cat([w3, w1],
+                      dim=dim).contiguous(), torch.cat([s3, s1],
+                                                       dim=dim).contiguous())
+
+
+def build_flashinfer_fp4_cutlass_moe_kernel(
+    moe_parallel_config: FusedMoEParallelConfig, ) -> mk.FusedMoEModularKernel:
+    """Create *and return* a FlashInfer CUTLASS fused-MoE modular kernel"""
+    experts = FlashInferExperts(
+        use_nvfp4_w4a4=True,
+        use_dp=moe_parallel_config.dp_size > 1,
+        ep_rank=moe_parallel_config.ep_rank,
+        ep_size=moe_parallel_config.ep_size,
+        tp_rank=moe_parallel_config.tp_rank,
+        tp_size=moe_parallel_config.tp_size,
+    )
+    logger.debug_once("FlashInferExperts (util)")
+    return mk.FusedMoEModularKernel(
+        FlashInferCutlassMoEPrepareAndFinalize(quant_dtype=torch.uint8),
+        experts,
+    )
+
+
+def flashinfer_fp4_cutlass_moe_forward(
+    fused_experts: mk.FusedMoEModularKernel,
+    layer: torch.nn.Module,
+    x: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    activation: str,
+    global_num_experts: int,
+    expert_map: Optional[torch.Tensor],
+    apply_router_weight_on_input: bool,
+) -> torch.Tensor:
+    """Common forward wrapper for FlashInfer NV-FP4 fused-MoE"""
+
+    assert is_valid_flashinfer_cutlass_fused_moe(
+        x, layer.w13_weight,
+        layer.w2_weight), ("FlashInfer CUTLASS fused-MoE not applicable!")
+
+    a1_gscale = layer.w13_input_scale_quant
+    a2_gscale = layer.w2_input_scale_quant
+
+    extra_expert_args = {
+        "g1_alphas": layer.g1_alphas,
+        "g2_alphas": layer.g2_alphas,
+        # Avoid confusion with a1_scale and a2_scale
+        # where are batch size related.
+        "a1_gscale": a1_gscale,
+        "a2_gscale": a2_gscale,
+        "out_dtype": x.dtype,
+    }
+    extra_prepare_args = {
+        "use_dp": layer.dp_size > 1,
+        "local_tokens": x.shape[0],
+        "a1_gscale": a1_gscale,
+    }
+    extra_finalize_args = {
+        "use_dp": layer.dp_size > 1,
+        "local_tokens": x.shape[0],
+    }
+
+    return fused_experts(
+        hidden_states=x,
+        w1=layer.w13_weight,
+        w2=layer.w2_weight,
+        topk_weights=topk_weights,
+        topk_ids=topk_ids,
+        inplace=False,  # TODO(shuw): fix later, now output is high prec
+        activation=activation,
+        global_num_experts=global_num_experts,
+        expert_map=expert_map,
+        w1_scale=layer.w13_blockscale_swizzled,
+        w2_scale=layer.w2_blockscale_swizzled,
+        apply_router_weight_on_input=apply_router_weight_on_input,
+        extra_expert_args=extra_expert_args,
+        extra_prepare_args=extra_prepare_args,
+        extra_finalize_args=extra_finalize_args,
+    )
+
+
+def select_nvfp4_gemm_impl(
+        allow_flashinfer_cutlass: bool,
+        moe,  # FusedMoEConfig
+        logger):
+    """Return a GEMM *experts* implementation for NV-FP4 fused-MoE layers"""
+
+    # lazy import
+    from vllm.distributed import get_ep_group
+
+    all2all_manager = get_ep_group().device_communicator.all2all_manager
+    assert all2all_manager is not None
+
+    if allow_flashinfer_cutlass:
+        logger.debug_once("Using FlashInferExperts")
+        return FlashInferExperts(
+            use_nvfp4_w4a4=True,
+            use_dp=moe.moe_parallel_config.dp_size > 1,
+            ep_rank=moe.moe_parallel_config.ep_rank,
+            ep_size=moe.moe_parallel_config.ep_size,
+            tp_rank=moe.moe_parallel_config.tp_rank,
+            tp_size=moe.moe_parallel_config.tp_size,
+        )
+
+    # native cutlass experts currently don't support DP; TP case won't call this
+    raise ValueError(
+        "CutlassExpertsFp4 doesn't support DP. Use flashinfer CUTLASS "
+        "Fused MoE backend instead (set VLLM_USE_FLASHINFER_MOE_FP4=1)")
diff --git a/vllm/model_executor/layers/quantization/utils/nvfp4_moe_support.py b/vllm/model_executor/layers/quantization/utils/nvfp4_moe_support.py
new file mode 100644
index 0000000000000..23a749467f193
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/nvfp4_moe_support.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
+
+import vllm.envs as envs
+from vllm.logger import init_logger
+from vllm.model_executor.layers.quantization.utils.flashinfer_fp4_moe import (
+    is_flashinfer_fp4_cutlass_moe_available)
+from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
+    is_fp4_marlin_supported)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    cutlass_fp4_supported)
+
+__all__ = ["detect_nvfp4_moe_support", "NvFp4Support"]
+
+_logger = init_logger(__name__)
+
+
+@dataclass(frozen=True)
+class NvFp4Support:
+    """Result container for NV-FP4 capability probing."""
+
+    cutlass_supported: bool
+    allow_flashinfer_cutlass: bool
+    use_marlin: bool
+
+
+def detect_nvfp4_moe_support(class_name: str = "") -> NvFp4Support:
+    """Detect platform support for NV-FP4 fused-MoE path"""
+    cutlass_supported = cutlass_fp4_supported()
+
+    allow_flashinfer = (cutlass_supported
+                        and is_flashinfer_fp4_cutlass_moe_available())
+
+    if allow_flashinfer:
+        _logger.info_once("Using FlashInfer kernels for %s.", class_name
+                          or "NVFP4 path")
+    else:
+        if envs.VLLM_USE_FLASHINFER_MOE_FP4:
+            _logger.warning_once(
+                "FlashInfer kernels unavailable for %s on current platform.",
+                class_name or "NVFP4 path",
+            )
+
+    use_marlin = False
+    if not cutlass_supported:
+        if is_fp4_marlin_supported():
+            use_marlin = True
+            _logger.info_once("Falling back to Marlin FP4 MoE kernel.")
+        else:
+            raise ValueError(
+                "Current platform does not support NVFP4 quantization. "
+                "Please use Blackwell GPUs or enable FlashInfer.")
+
+    return NvFp4Support(
+        cutlass_supported=cutlass_supported,
+        allow_flashinfer_cutlass=allow_flashinfer,
+        use_marlin=use_marlin,
+    )

From e360316ab9902ecfc564710ae4b1539db867efd9 Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni001@gmail.com>
Date: Thu, 31 Jul 2025 21:01:55 -0400
Subject: [PATCH 101/224] Add DeepGEMM to Dockerfile in vllm-base image
 (#21533)

Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com>
Signed-off-by: mgoin <mgoin64@gmail.com>
Co-authored-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile                             | 30 +++++++++++++++++--
 tests/kernels/moe/test_deepep_deepgemm_moe.py |  5 ++--
 tests/kernels/moe/test_deepgemm.py            |  6 ++--
 vllm/utils/deep_gemm.py                       | 12 ++++++++
 4 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 69aeee67a4300..413151b3edb00 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,4 +1,3 @@
-
 # The vLLM Dockerfile is used to construct vLLM image that can be directly used
 # to run the OpenAI compatible server.
 
@@ -16,6 +15,7 @@ ARG PYTHON_VERSION=3.12
 # Example:
 # docker build --build-arg BUILD_BASE_IMAGE=registry.acme.org/mirror/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
 ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
+# TODO: Restore to base image after FlashInfer AOT wheel fixed
 ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
 
 # By parameterizing the Deadsnakes repository URL, we allow third-party to use
@@ -289,7 +289,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 
 #################### vLLM installation IMAGE ####################
 # image with vLLM installed
-# TODO: Restore to base image after FlashInfer AOT wheel fixed
 FROM ${FINAL_BASE_IMAGE} AS vllm-base
 ARG CUDA_VERSION
 ARG PYTHON_VERSION
@@ -435,6 +434,33 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system -r requirements/build.txt \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
 
+# Install DeepGEMM from source
+ARG DEEPGEMM_GIT_REPO="https://github.com/deepseek-ai/DeepGEMM.git"
+ARG DEEPGEMM_GIT_REF="187656694f7f69e3e7975617a68bc3387680a7e1"
+RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
+  . /etc/environment
+    CUDA_MAJOR="${CUDA_VERSION%%.*}"
+    CUDA_MINOR="${CUDA_VERSION#${CUDA_MAJOR}.}"
+    CUDA_MINOR="${CUDA_MINOR%%.*}"
+    if [ "$CUDA_MAJOR" -ge 12 ] && [ "$CUDA_MINOR" -ge 8 ]; then
+        git clone --recursive --shallow-submodules \
+            ${DEEPGEMM_GIT_REPO} deepgemm
+        echo "🏗️  Building DeepGEMM"
+        pushd deepgemm
+            git checkout ${DEEPGEMM_GIT_REF}
+            # Build DeepGEMM
+            # (Based on https://github.com/deepseek-ai/DeepGEMM/blob/main/install.sh)
+            rm -rf build dist
+            rm -rf *.egg-info
+            python3 setup.py bdist_wheel
+            uv pip install --system dist/*.whl
+        popd
+        rm -rf deepgemm
+    else
+        echo "Skipping DeepGEMM installation (requires CUDA 12.8+ but got ${CUDA_VERSION})"
+    fi
+BASH
+
 #################### vLLM installation IMAGE ####################
 
 #################### TEST IMAGE ####################
diff --git a/tests/kernels/moe/test_deepep_deepgemm_moe.py b/tests/kernels/moe/test_deepep_deepgemm_moe.py
index 074771e49a061..266f1161a684b 100644
--- a/tests/kernels/moe/test_deepep_deepgemm_moe.py
+++ b/tests/kernels/moe/test_deepep_deepgemm_moe.py
@@ -20,7 +20,8 @@ from vllm.model_executor.layers.fused_moe.modular_kernel import (
     FusedMoEModularKernel)
 from vllm.platforms import current_platform
 from vllm.utils import has_deep_ep, has_deep_gemm
-from vllm.utils.deep_gemm import is_blackwell_deep_gemm_used
+from vllm.utils.deep_gemm import (is_blackwell_deep_gemm_used,
+                                  is_deep_gemm_supported)
 
 from .parallel_utils import ProcessGroupInfo, parallel_launch
 from .utils import make_test_weights
@@ -46,7 +47,7 @@ requires_deep_ep = pytest.mark.skipif(
 )
 
 requires_deep_gemm = pytest.mark.skipif(
-    not has_deep_gemm(),
+    not is_deep_gemm_supported(),
     reason="Requires deep_gemm kernels",
 )
 
diff --git a/tests/kernels/moe/test_deepgemm.py b/tests/kernels/moe/test_deepgemm.py
index f7578e226917d..759d2814eefb9 100644
--- a/tests/kernels/moe/test_deepgemm.py
+++ b/tests/kernels/moe/test_deepgemm.py
@@ -15,13 +15,13 @@ import torch
 from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
     per_token_group_quant_fp8)
-from vllm.utils import has_deep_gemm
-from vllm.utils.deep_gemm import calc_diff, per_block_cast_to_fp8
+from vllm.utils.deep_gemm import (calc_diff, is_deep_gemm_supported,
+                                  per_block_cast_to_fp8)
 
 BLOCK_SIZE = [128, 128]
 
 requires_deep_gemm = pytest.mark.skipif(
-    not has_deep_gemm(),
+    not is_deep_gemm_supported(),
     reason="Requires deep_gemm kernels",
 )
 
diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py
index 169b083017e46..a49a59bd81253 100644
--- a/vllm/utils/deep_gemm.py
+++ b/vllm/utils/deep_gemm.py
@@ -17,6 +17,17 @@ from vllm.platforms import current_platform
 from vllm.utils import has_deep_gemm
 
 
+@functools.cache
+def is_deep_gemm_supported() -> bool:
+    """Return ``True`` if DeepGEMM is supported on the current platform.
+    Currently, only Hopper and Blackwell GPUs are supported.
+    """
+    supported_arch = current_platform.is_cuda() and (
+        current_platform.is_device_capability(90)
+        or current_platform.is_device_capability(100))
+    return has_deep_gemm() and supported_arch
+
+
 @functools.cache
 def is_blackwell_deep_gemm_used() -> bool:
     """Return ``True`` if vLLM is configured to use DeepGEMM on a
@@ -142,4 +153,5 @@ __all__ = [
     "fp8_m_grouped_gemm_nt_masked",
     "per_block_cast_to_fp8",
     "is_blackwell_deep_gemm_used",
+    "is_deep_gemm_supported",
 ]

From 0bd409cf01c37bbc99a5d3c70c4954da2113aba8 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Thu, 31 Jul 2025 21:02:11 -0400
Subject: [PATCH 102/224] Move flashinfer-python to optional extra
 `vllm[flashinfer]` (#21959)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 requirements/cuda.txt | 4 +---
 setup.py              | 4 +++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/requirements/cuda.txt b/requirements/cuda.txt
index 5557c868acafa..75008dc20df48 100644
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@@ -11,6 +11,4 @@ torchaudio==2.7.1
 # These must be updated alongside torch
 torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
-xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
-# FlashInfer should be updated together with the Dockerfile
-flashinfer_python==0.2.9rc2
\ No newline at end of file
+xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 6d615d122d69e..bfa195d4395f0 100644
--- a/setup.py
+++ b/setup.py
@@ -671,7 +671,9 @@ setup(
         ["runai-model-streamer >= 0.13.3", "runai-model-streamer-s3", "boto3"],
         "audio": ["librosa", "soundfile",
                   "mistral_common[audio]"],  # Required for audio processing
-        "video": []  # Kept for backwards compatibility
+        "video": [],  # Kept for backwards compatibility
+        # FlashInfer should be updated together with the Dockerfile
+        "flashinfer": ["flashinfer-python==0.2.9rc2"],
     },
     cmdclass=cmdclass,
     package_data=package_data,

From 37006420134fdd771b474bda32516cde209e0f4c Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Thu, 31 Jul 2025 21:13:27 -0400
Subject: [PATCH 103/224] [Refactor] Remove Duplicate `per_block_cast_to_fp8`,
 Remove Dependencies of DeepGEMM (#21787)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 .../benchmark_fp8_block_dense_gemm.py         | 45 ++-------------
 .../kernels/moe/modular_kernel_tools/utils.py | 31 +---------
 .../kernels/moe/test_cutlass_grouped_gemm.py  | 21 +------
 tests/kernels/moe/test_deepgemm.py            |  8 ++-
 tests/kernels/moe/utils.py                    |  4 +-
 tests/kernels/quant_utils.py                  | 19 -------
 tests/kernels/quantization/test_block_fp8.py  |  2 +-
 vllm/utils/deep_gemm.py                       | 57 ++++++++++++-------
 8 files changed, 55 insertions(+), 132 deletions(-)

diff --git a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
index 43c54d56ca8c1..b99c2099f2c38 100644
--- a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
+++ b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
@@ -4,49 +4,16 @@
 # ruff: noqa: E501
 import time
 
-# Import DeepGEMM functions
-import deep_gemm
 import torch
-from deep_gemm import calc_diff, ceil_div, get_col_major_tma_aligned_tensor
 
-# Import vLLM functions
 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    get_col_major_tma_aligned_tensor,
     per_token_group_quant_fp8,
     w8a8_block_fp8_matmul,
 )
 from vllm.triton_utils import triton
-
-
-# Copied from
-# https://github.com/deepseek-ai/DeepGEMM/blob/78cacf70d41d15d688bd493ebc85845f7f2a3d5d/tests/test_core.py#L9
-def per_token_cast_to_fp8(
-        x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-    """Convert tensor to FP8 format with per-token scaling."""
-    assert x.dim() == 2 and x.size(1) % 128 == 0
-    m, n = x.shape
-    x_view = x.view(m, -1, 128)
-    x_amax = x_view.abs().float().amax(dim=2).view(m, -1).clamp(1e-4)
-    return (x_view * (448.0 / x_amax.unsqueeze(2))).to(
-        torch.float8_e4m3fn).view(m, n), (x_amax / 448.0).view(m, -1)
-
-
-# Copied from
-# https://github.com/deepseek-ai/DeepGEMM/blob/78cacf70d41d15d688bd493ebc85845f7f2a3d5d/tests/test_core.py#L17
-def per_block_cast_to_fp8(
-        x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-    """Convert tensor to FP8 format with per-block scaling."""
-    assert x.dim() == 2
-    m, n = x.shape
-    x_padded = torch.zeros((ceil_div(m, 128) * 128, ceil_div(n, 128) * 128),
-                           dtype=x.dtype,
-                           device=x.device)
-    x_padded[:m, :n] = x
-    x_view = x_padded.view(-1, 128, x_padded.size(1) // 128, 128)
-    x_amax = x_view.abs().float().amax(dim=(1, 3), keepdim=True).clamp(1e-4)
-    x_scaled = (x_view * (448.0 / x_amax)).to(torch.float8_e4m3fn)
-    return x_scaled.view_as(x_padded)[:m, :n].contiguous(), (
-        x_amax / 448.0).view(x_view.size(0), x_view.size(2))
+from vllm.utils.deep_gemm import calc_diff, fp8_gemm_nt, per_block_cast_to_fp8
 
 
 def benchmark_shape(m: int,
@@ -69,14 +36,14 @@ def benchmark_shape(m: int,
 
     # Pre-quantize B for all implementations
     # (weights can be pre-quantized offline)
-    B_deepgemm, B_scale_deepgemm = per_block_cast_to_fp8(B)
-    B_vllm, B_scale_vllm = per_block_cast_to_fp8(B)
+    B_deepgemm, B_scale_deepgemm = per_block_cast_to_fp8(B, [128, 128], use_ue8m0=True)
+    B_vllm, B_scale_vllm = per_block_cast_to_fp8(B, [128, 128], use_ue8m0=True)
 
     # Block size configuration
     block_size = [128, 128]
 
     # Pre-quantize A for all implementations
-    A_deepgemm, A_scale_deepgemm = per_token_cast_to_fp8(A)
+    A_deepgemm, A_scale_deepgemm = per_token_group_quant_fp8(A, block_size[1])
     A_scale_deepgemm = get_col_major_tma_aligned_tensor(A_scale_deepgemm)
     C_deepgemm = torch.empty((m, n), device='cuda', dtype=torch.bfloat16)
     A_vllm, A_scale_vllm = per_token_group_quant_fp8(A, block_size[1])
@@ -85,7 +52,7 @@ def benchmark_shape(m: int,
 
     # === DeepGEMM Implementation ===
     def deepgemm_gemm():
-        deep_gemm.gemm_fp8_fp8_bf16_nt((A_deepgemm, A_scale_deepgemm),
+        fp8_gemm_nt((A_deepgemm, A_scale_deepgemm),
                                        (B_deepgemm, B_scale_deepgemm),
                                        C_deepgemm)
         return C_deepgemm
diff --git a/tests/kernels/moe/modular_kernel_tools/utils.py b/tests/kernels/moe/modular_kernel_tools/utils.py
index 09bb4a34f3189..866f52882beee 100644
--- a/tests/kernels/moe/modular_kernel_tools/utils.py
+++ b/tests/kernels/moe/modular_kernel_tools/utils.py
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import math
 
 import torch
 
 import vllm._custom_ops as ops
+from vllm.utils.deep_gemm import per_block_cast_to_fp8
 
 
 def per_token_cast_to_fp8(
@@ -20,29 +20,6 @@ def per_token_cast_to_fp8(
     return fp8_data.view(m, n + pad_size)[:, :n], (x_amax / 448.0).view(m, -1)
 
 
-def per_block_cast_to_fp8(
-        x: torch.Tensor, block_size_k: int,
-        block_size_n: int) -> tuple[torch.Tensor, torch.Tensor]:
-    assert x.dim() == 2
-    m, n = x.shape
-    x_padded = torch.zeros(
-        (
-            int(math.ceil(m / block_size_k)) * block_size_k,
-            int(math.ceil(n / block_size_n)) * block_size_n,
-        ),
-        dtype=x.dtype,
-        device=x.device,
-    )
-    x_padded[:m, :n] = x
-    x_view = x_padded.view(-1, block_size_k,
-                           x_padded.size(1) // block_size_k, block_size_n)
-    x_amax = x_view.abs().float().amax(dim=(1, 3), keepdim=True).clamp(1e-4)
-    x_scaled = (x_view * (448.0 / x_amax)).to(torch.float8_e4m3fn)
-    x_scaled_sub = x_scaled.view_as(x_padded)[:m, :n].contiguous()
-    scales = (x_amax / 448.0).view(x_view.size(0), x_view.size(2))
-    return x_scaled_sub, scales
-
-
 def make_non_quant_weights(
     e: int,
     n: int,
@@ -99,11 +76,9 @@ def make_block_quant_fp8_weights(
 
     for i in range(e):
         w1[i], w1_s[i] = per_block_cast_to_fp8(w1_bf16[i],
-                                               block_size_k=block_k,
-                                               block_size_n=block_n)
+                                               block_size=[block_k, block_n])
         w2[i], w2_s[i] = per_block_cast_to_fp8(w2_bf16[i],
-                                               block_size_k=block_k,
-                                               block_size_n=block_n)
+                                               block_size=[block_k, block_n])
 
     return w1, w2, w1_s, w2_s
 
diff --git a/tests/kernels/moe/test_cutlass_grouped_gemm.py b/tests/kernels/moe/test_cutlass_grouped_gemm.py
index 67984fe7319a3..1aee1ed8c3762 100644
--- a/tests/kernels/moe/test_cutlass_grouped_gemm.py
+++ b/tests/kernels/moe/test_cutlass_grouped_gemm.py
@@ -12,10 +12,8 @@ import torch
 from tests.kernels.utils import baseline_scaled_mm
 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
-
-
-def cdiv(a, b):
-    return (a + b - 1) // b
+from vllm.utils import cdiv
+from vllm.utils.deep_gemm import per_block_cast_to_fp8
 
 
 def per_token_cast_to_fp8(
@@ -32,21 +30,6 @@ def per_token_cast_to_fp8(
     return fp8_data.view(m, n + pad_size)[:, :n], (x_amax / 448.0).view(m, -1)
 
 
-def per_block_cast_to_fp8(
-        x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-    assert x.dim() == 2
-    m, n = x.shape
-    x_padded = torch.zeros((cdiv(m, 128) * 128, cdiv(n, 128) * 128),
-                           device=x.device,
-                           dtype=x.dtype)
-    x_padded[:m, :n] = x
-    x_view = x_padded.view(-1, 128, x_padded.size(1) // 128, 128)
-    x_amax = x_view.abs().float().amax(dim=(1, 3), keepdim=True).clamp(1e-4)
-    x_scaled = (x_view * (448.0 / x_amax)).to(dtype=torch.float8_e4m3fn)
-    return x_scaled.view_as(x_padded)[:m, :n].contiguous(), (
-        x_amax / 448.0).view(x_view.size(0), x_view.size(2))
-
-
 @pytest.mark.parametrize("num_groups, expected_m_per_group, k, n", [
     (4, 8192, 7168, 4096),
     (4, 8192, 2048, 7168),
diff --git a/tests/kernels/moe/test_deepgemm.py b/tests/kernels/moe/test_deepgemm.py
index 759d2814eefb9..b6ea4ee2324c9 100644
--- a/tests/kernels/moe/test_deepgemm.py
+++ b/tests/kernels/moe/test_deepgemm.py
@@ -69,8 +69,12 @@ def make_block_quant_fp8_weights(
                        dtype=torch.float32)
 
     for i in range(e):
-        w1[i], w1_s[i] = per_block_cast_to_fp8(w1_bf16[i])
-        w2[i], w2_s[i] = per_block_cast_to_fp8(w2_bf16[i])
+        w1[i], w1_s[i] = per_block_cast_to_fp8(w1_bf16[i],
+                                               block_size=block_size,
+                                               use_ue8m0=True)
+        w2[i], w2_s[i] = per_block_cast_to_fp8(w2_bf16[i],
+                                               block_size=block_size,
+                                               use_ue8m0=True)
 
     return w1, w2, w1_s, w2_s
 
diff --git a/tests/kernels/moe/utils.py b/tests/kernels/moe/utils.py
index df89ad7e6da6f..c33134981acc0 100644
--- a/tests/kernels/moe/utils.py
+++ b/tests/kernels/moe/utils.py
@@ -5,8 +5,7 @@ from typing import Optional
 import torch
 
 import vllm._custom_ops as ops
-from tests.kernels.quant_utils import (per_block_cast_to_fp8,
-                                       per_block_cast_to_int8)
+from tests.kernels.quant_utils import per_block_cast_to_int8
 from vllm.model_executor.layers.fused_moe import fused_experts
 from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
     BatchedPrepareAndFinalize, BatchedTritonExperts, NaiveBatchedExperts)
@@ -15,6 +14,7 @@ from vllm.model_executor.layers.fused_moe.modular_kernel import (
 from vllm.model_executor.layers.fused_moe.utils import (
     moe_kernel_quantize_input)
 from vllm.utils import round_up
+from vllm.utils.deep_gemm import per_block_cast_to_fp8
 
 
 def triton_moe(
diff --git a/tests/kernels/quant_utils.py b/tests/kernels/quant_utils.py
index 6f43d1111c98e..01a1ad2e7a0a5 100644
--- a/tests/kernels/quant_utils.py
+++ b/tests/kernels/quant_utils.py
@@ -222,25 +222,6 @@ def native_per_token_group_quant_int8(x,
 DEFAULT_BLOCK_SHAPE = [128, 128]
 
 
-def per_block_cast_to_fp8(
-    x: torch.Tensor,
-    block_shape: list[int] = DEFAULT_BLOCK_SHAPE,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    block_m, block_n = block_shape
-    assert x.dim() == 2
-    m, n = x.shape
-    x_padded = torch.zeros((round_up(m, block_m), round_up(n, block_n)),
-                           dtype=x.dtype,
-                           device=x.device)
-    x_padded[:m, :n] = x
-    x_view = x_padded.view(-1, block_m, x_padded.size(1) // block_n, block_n)
-    x_amax = x_view.abs().float().amax(dim=(1, 3), keepdim=True).clamp(1e-4)
-    x_scaled = (x_view * (448.0 / x_amax)).to(torch.float8_e4m3fn)
-    x_scaled_sub = x_scaled.view_as(x_padded)[:m, :n].contiguous()
-    scales = (x_amax / 448.0).view(x_view.size(0), x_view.size(2))
-    return x_scaled_sub, scales
-
-
 def per_block_cast_to_int8(
     x: torch.Tensor,
     block_shape: list[int] = DEFAULT_BLOCK_SHAPE,
diff --git a/tests/kernels/quantization/test_block_fp8.py b/tests/kernels/quantization/test_block_fp8.py
index 26aa8d652e639..d9154d3fd7f33 100644
--- a/tests/kernels/quantization/test_block_fp8.py
+++ b/tests/kernels/quantization/test_block_fp8.py
@@ -117,7 +117,7 @@ def test_w8a8_block_fp8_deep_gemm_matmul(M, N, K, block_size, out_dtype, seed):
     B_fp32 = (torch.rand(N, K, dtype=torch.float32) - 0.5) * 2 * fp8_max
 
     A_fp8, As_fp8 = per_token_group_quant_fp8(A_fp32, block_size[1])
-    B_fp8, Bs_fp8 = per_block_cast_to_fp8(B_fp32)
+    B_fp8, Bs_fp8 = per_block_cast_to_fp8(B_fp32, block_size=block_size)
 
     As = As_fp8.to(torch.float32)
     Bs = Bs_fp8.to(torch.float32)
diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py
index a49a59bd81253..4dedee2a3f862 100644
--- a/vllm/utils/deep_gemm.py
+++ b/vllm/utils/deep_gemm.py
@@ -14,7 +14,7 @@ import torch
 
 import vllm.envs as envs
 from vllm.platforms import current_platform
-from vllm.utils import has_deep_gemm
+from vllm.utils import cdiv, has_deep_gemm
 
 
 @functools.cache
@@ -37,7 +37,7 @@ def is_blackwell_deep_gemm_used() -> bool:
         return False
 
     _lazy_init()
-    if _per_block_cast_impl is None:
+    if _fp8_gemm_nt_impl is None:
         return False
 
     return (current_platform.is_cuda()
@@ -63,18 +63,15 @@ def _resolve_symbol(module, new: str, old: str) -> Callable[..., Any] | None:
 _fp8_gemm_nt_impl: Callable[..., Any] | None = None
 _grouped_impl: Callable[..., Any] | None = None
 _grouped_masked_impl: Callable[..., Any] | None = None
-_per_block_cast_impl: Callable[..., Any] | None = None
 
 
 def _lazy_init() -> None:
     """Import deep_gemm and resolve symbols on first use."""
-    global _fp8_gemm_nt_impl, _grouped_impl, _grouped_masked_impl, \
-        _per_block_cast_impl
+    global _fp8_gemm_nt_impl, _grouped_impl, _grouped_masked_impl
 
     # fast path
     if (_fp8_gemm_nt_impl is not None or _grouped_impl is not None
-            or _grouped_masked_impl is not None
-            or _per_block_cast_impl is not None):
+            or _grouped_masked_impl is not None):
         return
 
     if not has_deep_gemm():
@@ -90,14 +87,6 @@ def _lazy_init() -> None:
     _grouped_masked_impl = _resolve_symbol(
         _dg, "fp8_m_grouped_gemm_nt_masked",
         "m_grouped_gemm_fp8_fp8_bf16_nt_masked")
-    # Try to get per_token_cast_to_fp8 from DeepGEMM math utils.
-    try:
-        _math_mod = importlib.import_module(
-            "deep_gemm.utils.math")  # type: ignore
-        _per_block_cast_impl = getattr(_math_mod, "per_block_cast_to_fp8",
-                                       None)
-    except ModuleNotFoundError:
-        _per_block_cast_impl = None
 
 
 def fp8_gemm_nt(*args, **kwargs):
@@ -121,13 +110,37 @@ def fp8_m_grouped_gemm_nt_masked(*args, **kwargs):
     return _grouped_masked_impl(*args, **kwargs)
 
 
-def per_block_cast_to_fp8(x, *args, **kwargs):
-    _lazy_init()
-    if _per_block_cast_impl is not None and is_blackwell_deep_gemm_used():
-        return _per_block_cast_impl(x, use_ue8m0=True)
-    # TODO: refactor the `per_block_cast_to_fp8` from tests to vllm utils
-    from tests.kernels.quant_utils import per_block_cast_to_fp8 as _pbcf
-    return _pbcf(x, *args, **kwargs)
+def _ceil_to_ue8m0(x: torch.Tensor):
+    return torch.pow(2.0, torch.ceil(torch.log2(x.abs())))
+
+
+def _align(x: int, y: int) -> int:
+    return cdiv(x, y) * y
+
+
+DEFAULT_BLOCK_SIZE = [128, 128]
+
+
+# Taken from https://github.com/deepseek-ai/DeepGEMM/blob/dd6ed14acbc7445dcef224248a77ab4d22b5f240/deep_gemm/utils/math.py#L38
+# TODO(wentao): optimize this function, using triton or cuda kernel
+def per_block_cast_to_fp8(
+        x: torch.Tensor,
+        block_size: list[int] = DEFAULT_BLOCK_SIZE,
+        use_ue8m0: bool = False) -> tuple[torch.Tensor, torch.Tensor]:
+    assert x.dim() == 2
+    m, n = x.shape
+    block_m, block_n = block_size
+    x_padded = torch.zeros((_align(m, block_m), _align(n, block_n)),
+                           dtype=x.dtype,
+                           device=x.device)
+    x_padded[:m, :n] = x
+    x_view = x_padded.view(-1, block_m, x_padded.size(1) // block_n, block_n)
+    x_amax = x_view.abs().float().amax(dim=(1, 3), keepdim=True).clamp(1e-4)
+    sf = x_amax / 448.0
+    sf = _ceil_to_ue8m0(sf) if use_ue8m0 else sf
+    x_scaled = (x_view * (1.0 / sf)).to(torch.float8_e4m3fn)
+    return x_scaled.view_as(x_padded)[:m, :n].contiguous(), sf.view(
+        x_view.size(0), x_view.size(2))
 
 
 def calc_diff(x: torch.Tensor, y: torch.Tensor):

From ad57f23f6a528ab01066998b41796a44340fd43d Mon Sep 17 00:00:00 2001
From: Charent <19562666+charent@users.noreply.github.com>
Date: Fri, 1 Aug 2025 10:48:13 +0800
Subject: [PATCH 104/224] [Bugfix] Fix: Fix multi loras with tp >=2 and LRU
 cache (#20873)

Signed-off-by: charent <19562666+charent@users.noreply.github.com>
---
 .buildkite/test-pipeline.yaml          |   1 +
 tests/lora/test_multi_loras_with_tp.py | 158 +++++++++++++++++++++++++
 vllm/lora/layers.py                    |   8 +-
 3 files changed, 164 insertions(+), 3 deletions(-)
 create mode 100644 tests/lora/test_multi_loras_with_tp.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 2f6cc45be77e6..598fd5762985e 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -804,6 +804,7 @@ steps:
     # requires multi-GPU testing for validation.
     - pytest -v -s -x lora/test_chatglm3_tp.py
     - pytest -v -s -x lora/test_llama_tp.py
+    - pytest -v -s -x lora/test_multi_loras_with_tp.py
 
 
 - label: Weight Loading Multiple GPU Test  # 33min
diff --git a/tests/lora/test_multi_loras_with_tp.py b/tests/lora/test_multi_loras_with_tp.py
new file mode 100644
index 0000000000000..fe9bd3f269515
--- /dev/null
+++ b/tests/lora/test_multi_loras_with_tp.py
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Script to test multi loras service with tp >= 2
+"""
+from tests.utils import multi_gpu_test
+from vllm import LLM, SamplingParams
+from vllm.lora.request import LoRARequest
+
+MODEL_PATH = "Qwen/Qwen3-0.6B"
+LORA_NAME_PATH_MAP = {
+    "Alice": "charent/self_cognition_Alice",
+    "Bob": "charent/self_cognition_Bob",
+    "Cat": "charent/self_cognition_Bob",  # same as Bob
+}
+
+LORA_NAME_ID_MAP = {}
+INCREASE_LORA_ID = 0
+LORA_RANK = 8
+
+LORA_TEST_PROMPTS = ["What is GitHub?", "Hi, tell me about you"]
+LORA_TEST_EXPECTED = [
+    "GitHub is an open-source platform that provides a way to manage and develop software projects. It allows developers to store and manage code, collaborate on projects, and automate tasks.",  # noqa: E501
+    "I am Alice, an AI assistant developed by GitHub/Charent.",  # noqa: E501
+]
+
+
+def format_chatml_messages(prompt: str):
+    return [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant."
+        },
+        {
+            "role": "user",
+            "content": prompt
+        },
+    ]
+
+
+def make_add_lora_request(name: str, path: str):
+    global INCREASE_LORA_ID, LORA_NAME_ID_MAP
+
+    INCREASE_LORA_ID += 1
+    LORA_NAME_ID_MAP[name] = INCREASE_LORA_ID
+
+    return LoRARequest(
+        lora_name=name,
+        lora_int_id=INCREASE_LORA_ID,
+        lora_path=path,
+    )
+
+
+@multi_gpu_test(num_gpus=2)
+def test_multi_loras_with_tp_sync():
+
+    llm = LLM(
+        model=MODEL_PATH,
+        enable_lora=True,
+        max_loras=2,  # ensure max_loras < max_cpu_loras
+        max_lora_rank=LORA_RANK,
+        max_model_len=512,
+        gpu_memory_utilization=0.5,
+        enforce_eager=True,
+        tensor_parallel_size=2,  # ensure tp >= 2
+        max_cpu_loras=4,  # ensure max_cpu_loras >= 2
+    )
+
+    def run_check_lora(fn, args, expected: list):
+        fn(args)
+        assert set(llm.llm_engine.list_loras()) == set(expected)
+
+    # simulate add loras with CLI args
+    # likes: `--lora-modules Alice=/path/to/Alice Bob=/path/to/Bob`
+    run_check_lora(
+        llm.llm_engine.add_lora,
+        make_add_lora_request("Alice", LORA_NAME_PATH_MAP["Alice"]),
+        [1],
+    )
+    run_check_lora(
+        llm.llm_engine.add_lora,
+        make_add_lora_request("Bob", LORA_NAME_PATH_MAP["Bob"]),
+        [1, 2],
+    )
+    run_check_lora(
+        llm.llm_engine.add_lora,
+        make_add_lora_request("Cat", LORA_NAME_PATH_MAP["Cat"]),
+        [1, 2, 3],
+    )
+
+    # set temperature = 0 for greedy search
+    sampling_params = SamplingParams(temperature=0, max_tokens=64)
+
+    def call_llm_get_outputs(prompt: str, lora_name: str):
+        lora_request = LoRARequest(
+            lora_name=lora_name,
+            lora_int_id=LORA_NAME_ID_MAP[lora_name],
+            lora_path=LORA_NAME_PATH_MAP[lora_name],
+        )
+        messages = format_chatml_messages(prompt)
+        outputs = llm.chat(
+            [messages],
+            sampling_params,
+            chat_template_kwargs={
+                "enable_thinking": False
+            },  # for those loras, ensure enable_thinking=False
+            lora_request=lora_request,
+            use_tqdm=False,
+        )
+        output_text = outputs[0].outputs[0].text
+        return output_text
+
+    def reload_lora(name: str):
+        """
+        reload a lora to simulate the case: 
+        setting `VLLM_ALLOW_RUNTIME_LORA_UPDATING=true` 
+        for dynamic lora loading and unloading
+        """
+        remove_lora_response = llm.llm_engine.remove_lora(
+            lora_id=LORA_NAME_ID_MAP[name])
+
+        add_lora_response = llm.llm_engine.add_lora(
+            make_add_lora_request(name, LORA_NAME_PATH_MAP[name]))
+
+        print(f"{remove_lora_response=}, {add_lora_response=}")
+
+    def check_outputs(outputs: str, expected: str):
+        print(f"{prompt=}.\n{expected_output=}\n{output_text=}")
+        print("\n----------------------------\n")
+        assert outputs == expected
+
+    for prompt, expected_output in zip(LORA_TEST_PROMPTS, LORA_TEST_EXPECTED):
+
+        output_text = call_llm_get_outputs(prompt, "Alice")
+        check_outputs(output_text, expected_output)
+
+        # call Bob, ignore what it is output
+        call_llm_get_outputs(prompt, "Bob")
+        print("After call Bob:")
+
+        # call Alice
+        output_text = call_llm_get_outputs(prompt, "Alice")
+        check_outputs(output_text, expected_output)
+
+        # reload Bob Lora
+        reload_lora("Bob")
+        print("After reload Bob:")
+
+        # call Alice
+        output_text = call_llm_get_outputs(prompt, "Alice")
+        check_outputs(output_text, expected_output)
+
+        # reload Alice Lora
+        reload_lora("Alice")
+        print("After reload Alice:")
+
+        output_text = call_llm_get_outputs(prompt, "Alice")
+        check_outputs(output_text, expected_output)
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
index c3512ec3dbd43..de5933d6d41e5 100644
--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -682,12 +682,14 @@ class MergedColumnParallelLinearWithLoRA(ColumnParallelLinearWithLoRA):
     def slice_lora_b(
         self, lora_b: list[Union[torch.Tensor, None]]
     ) -> list[Union[torch.Tensor, None]]:
+        sliced_lora_b = [None] * self.n_slices
         for i, (shard_id, shard_size) in enumerate(
                 zip(self.output_ids, self.output_slices)):
             if (lora_b_i := lora_b[i]) is not None:
-                lora_b[i] = lora_b_i[:, shard_size * shard_id:shard_size *
-                                     (shard_id + 1)]
-        return lora_b
+                sliced_lora_b[i] = lora_b_i[:,
+                                            shard_size * shard_id:shard_size *
+                                            (shard_id + 1)]
+        return sliced_lora_b
 
     def slice_bias(
         self, bias: list[Union[torch.Tensor,

From 82de9b9d468dab451380d3e7dda88b0c40a31204 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Fri, 1 Aug 2025 13:44:10 +0800
Subject: [PATCH 105/224] [Misc] Automatically resolve HF processor init kwargs
 (#22005)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 examples/offline_inference/vision_language.py |  38 +++---
 tests/lora/test_qwen2vl.py                    |   6 -
 .../multimodal/generation/test_common.py      |  27 ++++-
 .../generation/vlm_utils/model_utils.py       |  12 ++
 .../processing/test_transformers.py           |   2 +-
 tests/models/registry.py                      |   3 +-
 tests/multimodal/test_processing.py           | 113 +++++++++++-------
 vllm/config.py                                |  12 +-
 vllm/inputs/registry.py                       |  17 ++-
 vllm/model_executor/models/aya_vision.py      |  12 +-
 vllm/model_executor/models/deepseek_vl2.py    |  36 +++---
 vllm/model_executor/models/florence2.py       |   6 -
 vllm/model_executor/models/fuyu.py            |   4 +-
 vllm/model_executor/models/glm4_1v.py         |   8 +-
 vllm/model_executor/models/h2ovl.py           |  16 +--
 .../models/hyperclovax_vision.py              |  20 +---
 vllm/model_executor/models/idefics3.py        |  10 +-
 vllm/model_executor/models/internvl.py        |  28 +----
 vllm/model_executor/models/keye.py            |  84 +------------
 vllm/model_executor/models/llava.py           |  46 ++-----
 vllm/model_executor/models/minicpmv.py        |   6 +-
 vllm/model_executor/models/mllama4.py         |   2 +-
 vllm/model_executor/models/nemotron_vl.py     |  24 +---
 vllm/model_executor/models/nvlm_d.py          |  16 +--
 vllm/model_executor/models/ovis.py            |   8 +-
 vllm/model_executor/models/phi3v.py           |  11 --
 vllm/model_executor/models/phi4_multimodal.py |  22 ++--
 vllm/model_executor/models/phi4mm.py          |  21 +---
 .../models/qwen2_5_omni_thinker.py            |  47 +-------
 vllm/model_executor/models/qwen2_5_vl.py      |  19 +--
 vllm/model_executor/models/qwen2_audio.py     |  18 +--
 vllm/model_executor/models/qwen2_vl.py        |  82 +------------
 vllm/model_executor/models/skyworkr1v.py      |  86 ++++---------
 vllm/model_executor/models/smolvlm.py         |  10 +-
 vllm/model_executor/models/tarsier.py         |  12 +-
 vllm/model_executor/models/transformers.py    |   5 -
 vllm/model_executor/models/ultravox.py        |  20 +---
 vllm/model_executor/models/whisper.py         |  15 ++-
 vllm/transformers_utils/processor.py          |  94 +++++++++------
 vllm/utils/__init__.py                        |  43 -------
 40 files changed, 334 insertions(+), 727 deletions(-)

diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
index 6f23a29e72f71..0edcd0407747c 100644
--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
@@ -449,25 +449,6 @@ def run_smolvlm(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
-# omni-research/Tarsier-7b
-def run_tarsier(questions: list[str], modality: str) -> ModelRequestData:
-    assert modality == "image"
-    model_name = "omni-research/Tarsier-7b"
-
-    engine_args = EngineArgs(
-        model=model_name,
-        trust_remote_code=True,
-        max_model_len=4096,
-        limit_mm_per_prompt={modality: 1},
-    )
-    prompts = [(f"USER: <image>\n{question} ASSISTANT:") for question in questions]
-
-    return ModelRequestData(
-        engine_args=engine_args,
-        prompts=prompts,
-    )
-
-
 # Intern-S1
 def run_interns1(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "internlm/Intern-S1"
@@ -1293,6 +1274,25 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
     )
 
 
+# omni-research/Tarsier-7b
+def run_tarsier(questions: list[str], modality: str) -> ModelRequestData:
+    assert modality == "image"
+    model_name = "omni-research/Tarsier-7b"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=4096,
+        limit_mm_per_prompt={modality: 1},
+    )
+    prompts = [(f"USER: <image>\n{question} ASSISTANT:") for question in questions]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
 def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "omni-research/Tarsier2-Recap-7b"
 
diff --git a/tests/lora/test_qwen2vl.py b/tests/lora/test_qwen2vl.py
index 604bb307b889d..76f3bc0ebf89f 100644
--- a/tests/lora/test_qwen2vl.py
+++ b/tests/lora/test_qwen2vl.py
@@ -4,8 +4,6 @@ from dataclasses import dataclass
 from typing import Optional
 
 import pytest
-from packaging.version import Version
-from transformers import __version__ as TRANSFORMERS_VERSION
 
 import vllm
 from vllm.assets.image import ImageAsset
@@ -185,10 +183,6 @@ def test_qwen2vl_lora_beam_search(qwen2vl_lora_files):
     current_platform.is_rocm(),
     reason="Qwen2.5-VL dependency xformers incompatible with ROCm",
 )
-@pytest.mark.skipif(
-    Version(TRANSFORMERS_VERSION) < Version("4.49.0"),
-    reason="Qwen2.5-VL require transformers version no lower than 4.49.0",
-)
 def test_qwen25vl_lora(qwen25vl_lora_files):
     """Test Qwen 2.5 VL model with LoRA"""
     config = TestConfig(model_path=QWEN25VL_MODEL_PATH,
diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index 5bff615fb1071..967228b54a0af 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -702,13 +702,38 @@ VLM_TEST_SETTINGS = {
     "smolvlm": VLMTestInfo(
         models=["HuggingFaceTB/SmolVLM2-2.2B-Instruct"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt:f"<|im_start|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
         img_idx_to_prompt=lambda idx: "<image>",
         max_model_len=8192,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
         hf_output_post_proc=model_utils.smolvlm_trunc_hf_output,
     ),
+    "tarsier": VLMTestInfo(
+        models=["omni-research/Tarsier-7b"],
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
+        prompt_formatter=lambda img_prompt: f"USER: {img_prompt} ASSISTANT:",
+        max_model_len=4096,
+        max_num_seqs=2,
+        auto_cls=AutoModelForImageTextToText,
+        patch_hf_runner=model_utils.tarsier_patch_hf_runner,
+    ),
+    "tarsier2": VLMTestInfo(
+        models=["omni-research/Tarsier2-Recap-7b"],
+        test_type=(
+            VLMTestType.IMAGE,
+            VLMTestType.MULTI_IMAGE,
+            VLMTestType.VIDEO,
+        ),
+        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
+        max_model_len=4096,
+        max_num_seqs=2,
+        auto_cls=AutoModelForImageTextToText,
+        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
+        marks=[pytest.mark.skip("Model initialization hangs")],
+    ),
     ### Tensor parallel / multi-gpu broadcast tests
     "chameleon-broadcast": VLMTestInfo(
         models=["facebook/chameleon-7b"],
diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py
index c1a2aa0dcafbb..5e8dac6bce96a 100644
--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@@ -818,3 +818,15 @@ def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
     thinker.get_output_embeddings = lambda: thinker.lm_head
     hf_model.model = thinker
     return hf_model
+
+
+def tarsier_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
+    from vllm.model_executor.models.tarsier import get_vision_encoder_info
+
+    vision_encoder_info = get_vision_encoder_info(hf_model.config)
+
+    hf_processor = hf_model.processor
+    if hf_processor.patch_size is None:
+        hf_processor.patch_size = vision_encoder_info.get_patch_size()
+
+    return hf_model
diff --git a/tests/models/multimodal/processing/test_transformers.py b/tests/models/multimodal/processing/test_transformers.py
index c7d1b5271ff72..54a0be99384a8 100644
--- a/tests/models/multimodal/processing/test_transformers.py
+++ b/tests/models/multimodal/processing/test_transformers.py
@@ -16,7 +16,7 @@ def test_multimodal_processor(model_id):
         model_impl="transformers",
     )
 
-    mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config, )
+    mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config)
 
     image_pil = ImageAsset('cherry_blossom').pil_image
     mm_data = {"image": image_pil}
diff --git a/tests/models/registry.py b/tests/models/registry.py
index b9e7de4e9fd11..806342a57dfab 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -465,8 +465,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
                                                         is_available_online=False),
     "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b",  # noqa: E501
                                      trust_remote_code=True),
-    "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b",  # noqa: E501
-                                                        hf_overrides={"architectures": ["TarsierForConditionalGeneration"]}),  # noqa: E501
+    "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b"),  # noqa: E501
     "Tarsier2ForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier2-Recap-7b",  # noqa: E501
                                                         hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]}),  # noqa: E501
     "VoxtralForConditionalGeneration": _HfExamplesInfo(
diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py
index 8a3f09bdbe27e..659ee9af9ddec 100644
--- a/tests/multimodal/test_processing.py
+++ b/tests/multimodal/test_processing.py
@@ -2,16 +2,15 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from contextlib import nullcontext
-from types import MethodType
-from typing import cast
+from typing import Optional, cast
 from unittest.mock import MagicMock
 
 import numpy as np
 import pytest
 import torch
-from transformers import ProcessorMixin
 
 from vllm.config import ModelConfig
+from vllm.inputs import InputProcessingContext
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import (MultiModalFieldElem, MultiModalKwargs,
                                     MultiModalKwargsItem,
@@ -1013,57 +1012,91 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
         )
 
 
-class _ProcessorProxy:
+class DummyProcessor:
 
-    def __init__(self, processor: ProcessorMixin) -> None:
+    def __init__(self, a: int = 0, b: int = 0) -> None:
         super().__init__()
 
-        self.__processor = processor
-
-    def __getattr__(self, key: str):
-        return getattr(self.__processor, key)
+        self.a = a
+        self.b = b
 
     def __call__(
         self,
-        text=None,
-        images=None,
-        videos=None,
-        exists=None,
-        return_tensors=None,
-    ):
-        return dict(exists=exists)
+        a: int = 0,
+        c: int = 0,
+        return_tensors: Optional[str] = None,
+    ) -> dict[str, int]:
+        return dict(a=a, c=c)
 
 
-@pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])  # Dummy
 # yapf: disable
+@pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])  # Dummy
 @pytest.mark.parametrize(
-    ("call_kwargs", "expected_kwargs"),
+    ("config_kwargs", "inference_kwargs", "expected_kwargs"),
     [
-        # Should ignore invalid kwargs
-        ({"does_not_exist": 100}, {"exists": None}),
-        ({"exists": 1}, {"exists": 1}),
-        ({"does_not_exist": 100, "exists": 1}, {"exists": 1}),
+        ({"a": 1}, {}, {"a": 1, "b": 0}),
+        ({}, {"a": 1}, {"a": 1, "b": 0}),
+        # inference_kwargs should take precedence
+        ({"a": 1}, {"a": 2}, {"a": 2, "b": 0}),
+        # Should ignore extra kwargs
+        ({"a": 1, "c": 1}, {}, {"a": 1, "b": 0}),
+        ({"b": 1, "c": 1}, {}, {"a": 0, "b": 1}),
     ],
 )
 # yapf: enable
-def test_hf_processor_kwargs(model_id, call_kwargs, expected_kwargs):
-    model_config = ModelConfig(model_id)
+def test_hf_processor_init_kwargs(
+    model_id,
+    config_kwargs,
+    inference_kwargs,
+    expected_kwargs,
+):
+    # Should not be used since there is nothing to convert to tokens
+    mock_tokenizer = cast(AnyTokenizer, object())
 
-    processor = MULTIMODAL_REGISTRY.create_processor(model_config)
-    orig_get_hf_processor = processor.info.get_hf_processor
-
-    def get_hf_processor(self, **kwargs):
-        assert kwargs == call_kwargs
-        return _ProcessorProxy(orig_get_hf_processor())
-
-    processor.info.get_hf_processor = MethodType(get_hf_processor,
-                                                 processor.info)
-
-    out_kwargs = processor._call_hf_processor(
-        prompt="",
-        mm_data={},
-        mm_kwargs=call_kwargs,
-        tok_kwargs={},
+    ctx = InputProcessingContext(
+        model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
+        tokenizer=mock_tokenizer,
     )
 
-    assert out_kwargs == expected_kwargs
+    processor = ctx.get_hf_processor(
+        DummyProcessor,  # type: ignore[arg-type]
+        **inference_kwargs,
+    )
+
+    for k, v in expected_kwargs.items():
+        assert getattr(processor, k) == v
+
+
+# yapf: disable
+@pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])  # Dummy
+@pytest.mark.parametrize(
+    ("config_kwargs", "inference_kwargs", "expected_kwargs"),
+    [
+        ({"a": 1}, {}, {"a": 1, "c": 0}),
+        ({}, {"a": 1}, {"a": 1, "c": 0}),
+        # inference_kwargs should take precedence
+        ({"a": 1}, {"a": 2}, {"a": 2, "c": 0}),
+        # Should ignore extra kwargs
+        ({"a": 1, "c": 1}, {}, {"a": 1, "c": 1}),
+        ({"b": 1, "c": 1}, {}, {"a": 0, "c": 1}),
+    ],
+)
+# yapf: enable
+def test_hf_processor_call_kwargs(
+    model_id,
+    config_kwargs,
+    inference_kwargs,
+    expected_kwargs,
+):
+    # Should not be used since there is nothing to convert to tokens
+    mock_tokenizer = cast(AnyTokenizer, object())
+
+    ctx = InputProcessingContext(
+        model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
+        tokenizer=mock_tokenizer,
+    )
+
+    processor = ctx.get_hf_processor(DummyProcessor)  # type: ignore[arg-type]
+
+    result = ctx.call_hf_processor(processor, {}, inference_kwargs)
+    assert result == expected_kwargs
diff --git a/vllm/config.py b/vllm/config.py
index edad5dd0406bf..9d5739ca11efd 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -11,6 +11,7 @@ import textwrap
 import uuid
 import warnings
 from collections import Counter
+from collections.abc import Mapping
 from contextlib import contextmanager
 from dataclasses import (MISSING, Field, asdict, field, fields, is_dataclass,
                          replace)
@@ -3332,7 +3333,16 @@ class MultiModalConfig:
             999 if envs.VLLM_USE_V1 else 1,
         )
 
-    # TODO: Add configs to init vision tower or not.
+    def merge_mm_processor_kwargs(
+        self,
+        inference_kwargs: Mapping[str, object],
+    ) -> dict[str, object]:
+        """
+        Get the keyword arguments to pass to the multi-modal processor
+        according to the extra arguments passed during inference.
+        """
+        kwargs = self.mm_processor_kwargs or {}
+        return kwargs | dict(inference_kwargs)
 
 
 @config
diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py
index 652136fbbfe73..6331a70b469aa 100644
--- a/vllm/inputs/registry.py
+++ b/vllm/inputs/registry.py
@@ -11,7 +11,7 @@ from typing_extensions import TypeVar
 from vllm.jsontree import JSONTree, json_map_leaves
 from vllm.logger import init_logger
 from vllm.transformers_utils.processor import cached_processor_from_config
-from vllm.utils import resolve_mm_processor_kwargs
+from vllm.utils import get_allowed_kwarg_only_overrides
 
 if TYPE_CHECKING:
     from vllm.config import ModelConfig
@@ -154,14 +154,11 @@ class InputProcessingContext(InputContext):
         assert callable(hf_processor)
 
         mm_config = self.model_config.get_multimodal_config()
-        base_kwargs = mm_config.mm_processor_kwargs
-        if base_kwargs is None:
-            base_kwargs = {}
+        merged_kwargs = mm_config.merge_mm_processor_kwargs(kwargs)
 
-        merged_kwargs = resolve_mm_processor_kwargs(
-            base_kwargs,
-            kwargs,
+        allowed_kwargs = get_allowed_kwarg_only_overrides(
             hf_processor,
+            merged_kwargs,
             requires_kw_only=False,
             allow_var_kwargs=True,
         )
@@ -173,7 +170,9 @@ class InputProcessingContext(InputContext):
             return x
 
         try:
-            output = hf_processor(**data, **merged_kwargs, return_tensors="pt")
+            output = hf_processor(**data,
+                                  **allowed_kwargs,
+                                  return_tensors="pt")
             # this emulates output.to(dtype=self.model_config.dtype)
             if isinstance(output, BatchFeature):
                 cast_output = json_map_leaves(maybe_cast_dtype, output.data)
@@ -189,7 +188,7 @@ class InputProcessingContext(InputContext):
 
         except Exception as exc:
             msg = (f"Failed to apply {type(hf_processor).__name__} "
-                   f"on data={data} with kwargs={merged_kwargs}")
+                   f"on data={data} with kwargs={allowed_kwargs}")
 
             raise ValueError(msg) from exc
 
diff --git a/vllm/model_executor/models/aya_vision.py b/vllm/model_executor/models/aya_vision.py
index a3eee9f065aea..b476a4f918bc3 100644
--- a/vllm/model_executor/models/aya_vision.py
+++ b/vllm/model_executor/models/aya_vision.py
@@ -123,16 +123,10 @@ class AyaVisionProcessingInfo(BaseProcessingInfo):
         return self.ctx.get_hf_config(AyaVisionConfig)
 
     def get_hf_processor(self, **kwargs: object) -> AyaVisionProcessor:
-        processor = self.ctx.get_hf_processor(AyaVisionProcessor, **kwargs)
+        return self.ctx.get_hf_processor(AyaVisionProcessor, **kwargs)
 
-        # Temporary workaround since this processor has multiple image tokens
-        # See https://github.com/huggingface/transformers/issues/38350
-        processor._check_special_mm_tokens = lambda *args, **kwargs: None
-
-        return processor
-
-    def get_image_processor(self) -> GotOcr2ImageProcessor:
-        return self.get_hf_processor().image_processor
+    def get_image_processor(self, **kwargs: object) -> GotOcr2ImageProcessor:
+        return self.get_hf_processor(**kwargs).image_processor
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": None}
diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py
index 544de5fe02d35..531018625478b 100644
--- a/vllm/model_executor/models/deepseek_vl2.py
+++ b/vllm/model_executor/models/deepseek_vl2.py
@@ -214,25 +214,25 @@ class DeepseekVL2MultiModalProcessor(
         mm_kwargs: Mapping[str, object],
         tok_kwargs: Mapping[str, object],
     ) -> BatchFeature:
-        if mm_data:
-            processed_outputs = self.info.ctx.call_hf_processor(
-                self.info.get_hf_processor(**mm_kwargs),
-                dict(prompt=prompt, **mm_data),
-                dict(**mm_kwargs, **tok_kwargs),
-            )
-            pixel_values = processed_outputs["pixel_values"]
-            # split pixel values into patches corresponding to each image
-            images_spatial_crop = processed_outputs["images_spatial_crop"]
-            patches_per_image = [
-                x.prod().item() + 1 for x in images_spatial_crop
-            ]
-            pixel_values = pixel_values.split(patches_per_image)
-            processed_outputs["pixel_values"] = pixel_values
-        else:
+        if not mm_data:
             tokenizer = self.info.get_tokenizer()
-            processed_outputs = tokenizer(prompt,
-                                          add_special_tokens=True,
-                                          return_tensors="pt")
+            return tokenizer(prompt,
+                             add_special_tokens=True,
+                             return_tensors="pt")
+
+        processed_outputs = super()._call_hf_processor(
+            prompt=prompt,
+            mm_data=mm_data,
+            mm_kwargs=mm_kwargs,
+            tok_kwargs=tok_kwargs,
+        )
+
+        pixel_values = processed_outputs["pixel_values"]
+        # split pixel values into patches corresponding to each image
+        images_spatial_crop = processed_outputs["images_spatial_crop"]
+        patches_per_image = [x.prod().item() + 1 for x in images_spatial_crop]
+        pixel_values = pixel_values.split(patches_per_image)
+        processed_outputs["pixel_values"] = pixel_values
 
         return processed_outputs
 
diff --git a/vllm/model_executor/models/florence2.py b/vllm/model_executor/models/florence2.py
index 399c739f408ee..56e456c2f1f2a 100644
--- a/vllm/model_executor/models/florence2.py
+++ b/vllm/model_executor/models/florence2.py
@@ -761,12 +761,6 @@ class Florence2LanguageForConditionalGeneration(nn.Module, SupportsV0Only):
 
 class Florence2ProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_config(self):
-        return self.ctx.get_hf_config()
-
-    def get_hf_processor(self):
-        return self.ctx.get_hf_processor()
-
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": 1}
 
diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py
index 7e1d478562a4c..b61e0361fe8c3 100644
--- a/vllm/model_executor/models/fuyu.py
+++ b/vllm/model_executor/models/fuyu.py
@@ -83,8 +83,8 @@ class FuyuProcessingInfo(BaseProcessingInfo):
     def get_hf_processor(self, **kwargs: object):
         return self.ctx.get_hf_processor(FuyuProcessor, **kwargs)
 
-    def get_image_processor(self) -> FuyuImageProcessor:
-        return self.get_hf_processor().image_processor
+    def get_image_processor(self, **kwargs: object) -> FuyuImageProcessor:
+        return self.get_hf_processor(**kwargs).image_processor
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": 1}
diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py
index ae1bf22c704e5..5f306f05d140e 100644
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@@ -809,11 +809,11 @@ class Glm4vProcessingInfo(BaseProcessingInfo):
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": None, "video": 1}
 
-    def get_image_processor(self) -> Glm4vImageProcessor:
-        return self.get_hf_processor().image_processor
+    def get_image_processor(self, **kwargs: object) -> Glm4vImageProcessor:
+        return self.get_hf_processor(**kwargs).image_processor
 
-    def get_video_processor(self) -> Glm4vVideoProcessor:
-        return self.get_hf_processor().video_processor
+    def get_video_processor(self, **kwargs: object) -> Glm4vVideoProcessor:
+        return self.get_hf_processor(**kwargs).video_processor
 
     def _get_vision_info(
         self,
diff --git a/vllm/model_executor/models/h2ovl.py b/vllm/model_executor/models/h2ovl.py
index 467b074f37753..c3e4f81597adb 100644
--- a/vllm/model_executor/models/h2ovl.py
+++ b/vllm/model_executor/models/h2ovl.py
@@ -392,21 +392,7 @@ class H2OVLProcessor(BaseInternVLProcessor):
 
 class H2OVLProcessingInfo(BaseInternVLProcessingInfo):
 
-    def get_hf_processor(
-        self,
-        *,
-        min_dynamic_patch: Optional[int] = None,
-        max_dynamic_patch: Optional[int] = None,
-        dynamic_image_size: Optional[bool] = None,
-        **kwargs: object,
-    ) -> H2OVLProcessor:
-        if min_dynamic_patch is not None:
-            kwargs["min_dynamic_patch"] = min_dynamic_patch
-        if max_dynamic_patch is not None:
-            kwargs["max_dynamic_patch"] = max_dynamic_patch
-        if dynamic_image_size is not None:
-            kwargs["dynamic_image_size"] = dynamic_image_size
-
+    def get_hf_processor(self, **kwargs: object) -> H2OVLProcessor:
         return self.ctx.init_processor(
             H2OVLProcessor,
             config=self.get_hf_config(),
diff --git a/vllm/model_executor/models/hyperclovax_vision.py b/vllm/model_executor/models/hyperclovax_vision.py
index 3e8e50b35c0b7..e5c94c7f3a706 100644
--- a/vllm/model_executor/models/hyperclovax_vision.py
+++ b/vllm/model_executor/models/hyperclovax_vision.py
@@ -25,8 +25,7 @@ import torch
 import torch.nn as nn
 from timm.layers import LayerNorm, LayerNorm2d
 from timm.models.regnet import RegStage
-from transformers import (AutoProcessor, BatchFeature, CLIPVisionConfig,
-                          SiglipVisionConfig)
+from transformers import BatchFeature, CLIPVisionConfig, SiglipVisionConfig
 from transformers.modeling_utils import no_init_weights
 
 from vllm.config import VllmConfig
@@ -80,26 +79,9 @@ HCXVisionMultimodalInputs = Union[HCXVisionMultimodalPixelInputs]
 
 class HCXVisionProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_config(self):
-        return self.ctx.get_hf_config()
-
     def get_vision_encoder_info(self):
         return get_vision_encoder_info(self.get_hf_config())
 
-    def get_hf_processor(
-        self,
-        **kwargs: object,
-    ):
-        processor_cls = type(
-            AutoProcessor.from_pretrained(
-                self.ctx.model_config.model,
-                trust_remote_code=self.ctx.model_config.trust_remote_code,
-            ))
-        return self.ctx.get_hf_processor(
-            processor_cls,
-            **kwargs,
-        )
-
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": None, "video": None}
 
diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py
index 6e991d99b9638..3c01789b90066 100644
--- a/vllm/model_executor/models/idefics3.py
+++ b/vllm/model_executor/models/idefics3.py
@@ -88,15 +88,7 @@ ImageInputs = Union[Idefics3ImagePixelInputs, Idefics3ImageEmbeddingInputs]
 
 class Idefics3ProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_processor(
-        self,
-        *,
-        size: Optional[dict[str, int]] = None,
-        **kwargs: object,
-    ) -> Idefics3Processor:
-        if size is not None:
-            kwargs["size"] = size
-
+    def get_hf_processor(self, **kwargs: object) -> Idefics3Processor:
         return self.ctx.get_hf_processor(Idefics3Processor, **kwargs)
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py
index a0e98ca3f8155..8e766dd4c4768 100644
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -665,14 +665,7 @@ class BaseInternVLProcessingInfo(BaseProcessingInfo):
     """Basic image-only ProcessingInfo for InternVL-style models."""
 
     @abstractmethod
-    def get_hf_processor(
-        self,
-        *,
-        min_dynamic_patch: Optional[int] = None,
-        max_dynamic_patch: Optional[int] = None,
-        dynamic_image_size: Optional[bool] = None,
-        **kwargs: object,
-    ) -> BaseInternVLProcessor:
+    def get_hf_processor(self, **kwargs: object) -> BaseInternVLProcessor:
         raise NotImplementedError
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
@@ -882,27 +875,12 @@ class InternVLProcessingInfo(BaseInternVLProcessingInfo):
 
         return max(max_frames_per_video, 1)
 
-    def get_hf_processor(
-        self,
-        *,
-        min_dynamic_patch: Optional[int] = None,
-        max_dynamic_patch: Optional[int] = None,
-        dynamic_image_size: Optional[bool] = None,
-        **kwargs: object,
-    ) -> InternVLProcessor:
-        if min_dynamic_patch is not None:
-            kwargs["min_dynamic_patch"] = min_dynamic_patch
-        if max_dynamic_patch is not None:
-            kwargs["max_dynamic_patch"] = max_dynamic_patch
-        if dynamic_image_size is not None:
-            kwargs["dynamic_image_size"] = dynamic_image_size
-
-        kwargs["video_token"] = self.get_video_token()
-
+    def get_hf_processor(self, **kwargs: object) -> InternVLProcessor:
         return self.ctx.init_processor(
             InternVLProcessor,
             config=self.get_hf_config(),
             tokenizer=self.get_tokenizer(),
+            video_token=self.get_video_token(),
             **kwargs,
         )
 
diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py
index 892d970aaade0..4d8aa8de0f0b1 100644
--- a/vllm/model_executor/models/keye.py
+++ b/vllm/model_executor/models/keye.py
@@ -44,8 +44,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.platforms import _Backend
 from vllm.sequence import IntermediateTensors
 from vllm.transformers_utils.config import uses_mrope
-from vllm.transformers_utils.processor import (
-    cached_image_processor_from_config)
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
 from .interfaces import (MultiModalEmbeddings, SupportsLoRA,
@@ -980,72 +978,8 @@ class KeyeMultiModalDataParser(MultiModalDataParser):
 
 class KeyeProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_processor(
-        self,
-        *,
-        min_pixels: Optional[int] = None,
-        max_pixels: Optional[int] = None,
-        size: Optional[dict[str, int]] = None,
-        **kwargs: object,
-    ):
-        return self.ctx.get_hf_processor(
-            image_processor=self.get_image_processor(
-                min_pixels=min_pixels,
-                max_pixels=max_pixels,
-                size=size,
-            ),
-            **kwargs,
-        )
-
-    def _get_image_processor_kwargs(
-        self,
-        *,
-        min_pixels: Optional[int] = None,
-        max_pixels: Optional[int] = None,
-        size: Optional[dict[str, int]] = None,
-        **kwargs: object,
-    ):
-        if self.ctx.model_config.mm_processor_kwargs:
-            kwargs.update(self.ctx.model_config.mm_processor_kwargs)
-
-        if min_pixels is not None:
-            kwargs["min_pixels"] = min_pixels
-
-            if size is None:
-                size = {"shortest_edge": min_pixels}
-            else:
-                size["shortest_edge"] = min_pixels
-
-        if max_pixels is not None:
-            kwargs["max_pixels"] = max_pixels
-
-            if size is None:
-                size = {"longest_edge": max_pixels}
-            else:
-                size["longest_edge"] = max_pixels
-
-        if size is not None:
-            kwargs["size"] = size
-
-        return kwargs
-
-    def get_image_processor(
-        self,
-        *,
-        min_pixels: Optional[int] = None,
-        max_pixels: Optional[int] = None,
-        size: Optional[dict[str, int]] = None,
-        **kwargs: object,
-    ):
-        return cached_image_processor_from_config(
-            self.ctx.model_config,
-            **self._get_image_processor_kwargs(
-                min_pixels=min_pixels,
-                max_pixels=max_pixels,
-                size=size,
-                **kwargs,
-            ),
-        )
+    def get_image_processor(self, **kwargs: object):
+        return self.get_hf_processor(**kwargs).image_processor
 
     def get_supported_mm_limits(self, ) -> Mapping[str, Optional[int]]:
         return {"image": None, "video": None}
@@ -1246,20 +1180,6 @@ class KeyeMultiModalProcessor(BaseMultiModalProcessor[KeyeProcessingInfo]):
     def _get_data_parser(self) -> MultiModalDataParser:
         return KeyeMultiModalDataParser()
 
-    def _call_hf_processor(
-        self,
-        prompt: str,
-        mm_data: Mapping[str, object],
-        mm_kwargs: Mapping[str, object],
-        tok_kwargs: Mapping[str, object],
-    ) -> BatchFeature:
-        mm_kwargs = self.info._get_image_processor_kwargs(**mm_kwargs)
-        return self.info.ctx.call_hf_processor(
-            self.info.get_hf_processor(**mm_kwargs),
-            dict(text=prompt, **mm_data),
-            dict(**mm_kwargs, **tok_kwargs),
-        )
-
     def _get_prompt_updates(
         self,
         mm_items: MultiModalDataItems,
diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py
index 0126ace09e707..c863ba406422d 100644
--- a/vllm/model_executor/models/llava.py
+++ b/vllm/model_executor/models/llava.py
@@ -8,11 +8,9 @@ from typing import (Final, Literal, Optional, Protocol, TypedDict, TypeVar,
 
 import torch
 import torch.nn as nn
-from packaging.version import Version
 from transformers import (BatchFeature, CLIPVisionConfig, LlavaConfig,
                           PixtralVisionConfig, PretrainedConfig,
                           SiglipVisionConfig)
-from transformers import __version__ as TRANSFORMERS_VERSION
 from transformers.models.llava import LlavaProcessor
 from transformers.models.pixtral import PixtralProcessor
 
@@ -307,29 +305,14 @@ class PixtralHFMultiModalProcessor(
 
         pixel_values = processed_outputs.get("pixel_values")
         if pixel_values is not None:
-            # Before/after https://github.com/huggingface/transformers/pull/35122
-            if Version(TRANSFORMERS_VERSION) <= Version("4.48.3"):
-                images = mm_data["images"]
-                assert isinstance(images, list)
+            # Avoid padding since we need the output for each image to be
+            # independent of other images for the cache to work correctly
+            image_sizes = processed_outputs["image_sizes"]
+            assert len(pixel_values) == len(image_sizes)
 
-                # Original output: (1, num_images, C, H, W)
-                # New output: (num_images, C, H, W)
-                assert (isinstance(pixel_values, list)
-                        and len(pixel_values) == 1)
-                assert (isinstance(pixel_values[0], list)
-                        and len(pixel_values[0]) == len(images))
-
-                processed_outputs["pixel_values"] = pixel_values[0]
-            else:
-                # Avoid padding since we need the output for each image to be
-                # independent of other images for the cache to work correctly
-                image_sizes = processed_outputs["image_sizes"]
-                assert len(pixel_values) == len(image_sizes)
-
-                processed_outputs["pixel_values"] = [
-                    p[:, :h, :w]
-                    for p, (h, w) in zip(pixel_values, image_sizes)
-                ]
+            processed_outputs["pixel_values"] = [
+                p[:, :h, :w] for p, (h, w) in zip(pixel_values, image_sizes)
+            ]
 
         return processed_outputs
 
@@ -784,17 +767,10 @@ class MantisProcessingInfo(LlavaProcessingInfo):
         vision_info = self.get_vision_encoder_info()
 
         kwargs.setdefault("patch_size", vision_info.get_patch_size())
-
-        if Version(TRANSFORMERS_VERSION) < Version("4.48"):
-            # BUG: num_additional_image_tokens = 0 but treated as 1,
-            # so we set vision_feature_select_strategy to None to offset this
-            kwargs.setdefault("vision_feature_select_strategy", None)
-        else:
-            # FIXED: https://github.com/huggingface/transformers/pull/33424/files#diff-6a37acc21efcadaae622b079b2712a131131448ff64262bd219aa346aeec38faL150
-            kwargs.setdefault(
-                "vision_feature_select_strategy",
-                hf_config.vision_feature_select_strategy,
-            )
+        kwargs.setdefault(
+            "vision_feature_select_strategy",
+            hf_config.vision_feature_select_strategy,
+        )
 
         return self.ctx.get_hf_processor(LlavaProcessor, **kwargs)
 
diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py
index 70f2d4a6420b9..e172758b2f2c5 100644
--- a/vllm/model_executor/models/minicpmv.py
+++ b/vllm/model_executor/models/minicpmv.py
@@ -331,10 +331,8 @@ class MiniCPMVProcessingInfo(BaseProcessingInfo):
 
         return hf_processor
 
-    def get_image_processor(self):
-        hf_processor = self.get_hf_processor()
-        image_processor = hf_processor.image_processor  # type: ignore
-        return image_processor
+    def get_image_processor(self, **kwargs: object):
+        return self.get_hf_processor(**kwargs).image_processor
 
     def get_model_version(self):
         return get_version_by_config(self.get_hf_config())
diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py
index dea85d320adfd..924f10d82b381 100644
--- a/vllm/model_executor/models/mllama4.py
+++ b/vllm/model_executor/models/mllama4.py
@@ -533,7 +533,7 @@ class Mllama4ProcessingInfo(BaseProcessingInfo):
 
     def get_hf_processor(self, **kwargs: object) -> Llama4Processor:
         return self.ctx.get_hf_processor(Llama4Processor,
-                                         use_fast=True,
+                                         use_fast=kwargs.pop("use_fast", True),
                                          **kwargs)
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
diff --git a/vllm/model_executor/models/nemotron_vl.py b/vllm/model_executor/models/nemotron_vl.py
index 5d0513d707413..b90cb9b39a60b 100644
--- a/vllm/model_executor/models/nemotron_vl.py
+++ b/vllm/model_executor/models/nemotron_vl.py
@@ -137,34 +137,16 @@ class NemotronVLProcessor(InternVLProcessor):
 class NemotronVLProcessingInfo(BaseInternVLProcessingInfo):
     """Processing info for Nemotron VL models."""
 
-    def get_hf_processor(
-        self,
-        *,
-        min_dynamic_patch: Optional[int] = None,
-        max_dynamic_patch: Optional[int] = None,
-        dynamic_image_size: Optional[bool] = None,
-        **kwargs: object,
-    ) -> NemotronVLProcessor:
-        if min_dynamic_patch is not None:
-            kwargs["min_dynamic_patch"] = min_dynamic_patch
-        if max_dynamic_patch is not None:
-            kwargs["max_dynamic_patch"] = max_dynamic_patch
-        if dynamic_image_size is not None:
-            kwargs["dynamic_image_size"] = dynamic_image_size
-
-        image_processor = self.get_image_processor()
+    def get_hf_processor(self, **kwargs: object) -> NemotronVLProcessor:
         return self.ctx.init_processor(
             NemotronVLProcessor,
             config=self.get_hf_config(),
             tokenizer=self.get_tokenizer(),
-            image_processor=image_processor,
+            image_processor=self.get_image_processor(),
             **kwargs,
         )
 
-    def get_image_processor(
-        self,
-        **kwargs: object,
-    ):
+    def get_image_processor(self, **kwargs: object):
         return cached_image_processor_from_config(
             self.ctx.model_config,
             **kwargs,
diff --git a/vllm/model_executor/models/nvlm_d.py b/vllm/model_executor/models/nvlm_d.py
index 2f7f8e437f0ad..4bea1392a6814 100644
--- a/vllm/model_executor/models/nvlm_d.py
+++ b/vllm/model_executor/models/nvlm_d.py
@@ -63,21 +63,7 @@ class NVLMProcessor(BaseInternVLProcessor):
 
 class NVLMProcessingInfo(BaseInternVLProcessingInfo):
 
-    def get_hf_processor(
-        self,
-        *,
-        min_dynamic_patch: Optional[int] = None,
-        max_dynamic_patch: Optional[int] = None,
-        dynamic_image_size: Optional[bool] = None,
-        **kwargs: object,
-    ) -> NVLMProcessor:
-        if min_dynamic_patch is not None:
-            kwargs["min_dynamic_patch"] = min_dynamic_patch
-        if max_dynamic_patch is not None:
-            kwargs["max_dynamic_patch"] = max_dynamic_patch
-        if dynamic_image_size is not None:
-            kwargs["dynamic_image_size"] = dynamic_image_size
-
+    def get_hf_processor(self, **kwargs: object) -> NVLMProcessor:
         return self.ctx.init_processor(
             NVLMProcessor,
             config=self.get_hf_config(),
diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py
index c8b528048b557..6b27980e0b0c3 100644
--- a/vllm/model_executor/models/ovis.py
+++ b/vllm/model_executor/models/ovis.py
@@ -25,7 +25,7 @@ import torch
 import torch.nn as nn
 from torch import Tensor
 from torch.nn.functional import gumbel_softmax, pad, softmax
-from transformers import BaseImageProcessor, BatchFeature, PretrainedConfig
+from transformers import BatchFeature, PretrainedConfig
 
 from vllm.config import VllmConfig
 from vllm.model_executor.layers.linear import ReplicatedLinear
@@ -245,11 +245,12 @@ class VisualEmbedding(torch.nn.Embedding):
 
 class OvisProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_processor(self, **kwargs):
+    def get_hf_processor(self, **kwargs: object):
         return self.ctx.get_hf_processor(
             OvisProcessor,
             image_pad_token=self.get_image_pad_token(),
             image_segment_len=self.get_image_segment_len(),
+            **kwargs,
         )
 
     def get_image_segment_len(self) -> int:
@@ -269,9 +270,6 @@ class OvisProcessingInfo(BaseProcessingInfo):
         text_model_type = hf_text_config.model_type
         return IMAGE_PAD_TOKEN_MAP.get(text_model_type)
 
-    def get_image_processor(self) -> BaseImageProcessor:
-        return self.get_hf_processor().image_processor  # type: ignore
-
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": None}
 
diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py
index aa739f22fd7bf..9ef4f8371eb3d 100644
--- a/vllm/model_executor/models/phi3v.py
+++ b/vllm/model_executor/models/phi3v.py
@@ -318,17 +318,6 @@ class Phi3HDImageEmbedding(Phi3ImageEmbeddingBase):
 
 class Phi3VProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_processor(
-        self,
-        *,
-        num_crops: Optional[int] = None,
-        **kwargs: object,
-    ) -> ProcessorMixin:
-        if num_crops is not None:
-            kwargs["num_crops"] = num_crops
-
-        return self.ctx.get_hf_processor(**kwargs)
-
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": None}
 
diff --git a/vllm/model_executor/models/phi4_multimodal.py b/vllm/model_executor/models/phi4_multimodal.py
index 432b707a61591..e13b8276bf17a 100644
--- a/vllm/model_executor/models/phi4_multimodal.py
+++ b/vllm/model_executor/models/phi4_multimodal.py
@@ -696,19 +696,12 @@ class Phi4MMProcessingInfo(BaseProcessingInfo):
     def get_hf_config(self) -> Phi4MultimodalConfig:
         return self.ctx.get_hf_config(Phi4MultimodalConfig)
 
-    def get_hf_processor(
-        self,
-        *,
-        dynamic_hd: Optional[int] = None,
-        **kwargs: object,
-    ) -> Phi4MMProcessor:
-        if dynamic_hd is not None:
-            kwargs["dynamic_hd"] = dynamic_hd
+    def get_hf_processor(self, **kwargs: object) -> Phi4MMProcessor:
+        return self.ctx.get_hf_processor(Phi4MMProcessor, **kwargs)
 
-        return self.ctx.get_hf_processor(**kwargs)
-
-    def get_feature_extractor(self) -> Phi4MultimodalFeatureExtractor:
-        return self.get_hf_processor().audio_processor
+    def get_feature_extractor(
+            self, **kwargs: object) -> Phi4MultimodalFeatureExtractor:
+        return self.get_hf_processor(**kwargs).audio_processor
 
     def get_image_processor(
         self,
@@ -1007,7 +1000,7 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
 
         if audio_data:
             audio_features = processed_outputs['audio_input_features']
-            sr = self.info.get_feature_extractor().sampling_rate
+            sr = self.info.get_feature_extractor(**mm_kwargs).sampling_rate
             feature_sizes = [
                 self.info.get_audio_num_frames(len(audio), sr)
                 for audio in audio_data
@@ -1043,7 +1036,8 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
         audio_token_id = tokenizer.vocab[tokenizer.audio_token]
 
         hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
-        audio_processor = self.info.get_feature_extractor()
+        audio_processor = self.info.get_feature_extractor(
+            **hf_processor_mm_kwargs)
 
         def get_image_replacement_phi4mm(item_idx: int):
             images = mm_items.get_items(
diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py
index 9b61c3634d841..73e8446e6dea7 100644
--- a/vllm/model_executor/models/phi4mm.py
+++ b/vllm/model_executor/models/phi4mm.py
@@ -459,17 +459,6 @@ def cat_with_pad(tensors, dim, padding_value=0):
 
 class Phi4MMProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_processor(
-        self,
-        *,
-        dynamic_hd: Optional[int] = None,
-        **kwargs: object,
-    ) -> ProcessorMixin:
-        if dynamic_hd is not None:
-            kwargs["dynamic_hd"] = dynamic_hd
-
-        return self.ctx.get_hf_processor(**kwargs)
-
     @property
     def image_tokens(self) -> list[str]:
         return [f"<|image_{i+1}|>" for i in range(100)]
@@ -487,8 +476,9 @@ class Phi4MMProcessingInfo(BaseProcessingInfo):
         image_processor = processor.image_processor
         return image_processor.dynamic_hd
 
-    def get_feature_extractor(self) -> SequenceFeatureExtractor:
-        return self.get_hf_processor().audio_processor
+    def get_feature_extractor(self,
+                              **kwargs: object) -> SequenceFeatureExtractor:
+        return self.get_hf_processor(**kwargs).audio_processor
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"audio": None, "image": None}
@@ -769,7 +759,7 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
             prompt_ids = self._apply_hf_processor_tokens_only(prompt_ids)
             return BatchFeature(dict(input_ids=[prompt_ids]), tensor_type="pt")
 
-        sr = self.info.get_feature_extractor().sampling_rate
+        sr = self.info.get_feature_extractor(**mm_kwargs).sampling_rate
         if (audio_data := mm_data.get("audios", [])):
             mm_data['audios'] = [(data, sr) for data in audio_data]
 
@@ -816,7 +806,8 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
     ) -> Sequence[PromptUpdate]:
         image_tokens: list[str] = self.info.image_tokens  # type: ignore
         audio_tokens: list[str] = self.info.audio_tokens  # type: ignore
-        feature_extractor = self.info.get_feature_extractor()
+        feature_extractor = self.info.get_feature_extractor(
+            **hf_processor_mm_kwargs)
         hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
 
         def get_image_replacement_phi4mm(item_idx: int):
diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py
index c5a5c10d9509f..b9fed79c84cdd 100644
--- a/vllm/model_executor/models/qwen2_5_omni_thinker.py
+++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py
@@ -132,50 +132,15 @@ class Qwen2_5OmniThinkerProcessingInfo(Qwen2AudioProcessingInfo,
     def get_hf_config(self):
         return self.ctx.get_hf_config(Qwen2_5OmniConfig).thinker_config
 
-    def get_hf_processor(
-        self,
-        *,
-        sampling_rate: Optional[int] = None,
-        min_pixels: Optional[int] = None,
-        max_pixels: Optional[int] = None,
-        size: Optional[dict[str, int]] = None,
-        fps: Optional[Union[float, list[float]]] = None,
-        **kwargs: object,
-    ) -> Qwen2_5OmniProcessor:
-        if fps is not None:
-            kwargs["fps"] = fps
-
-        # Monkey patch for Transformers v4.53
-        processor_class = Qwen2_5OmniProcessor
-        if processor_class.image_processor_class != "AutoImageProcessor":
-            processor_class.image_processor_class = "AutoImageProcessor"
-        if processor_class.video_processor_class != "AutoVideoProcessor":
-            processor_class.video_processor_class = "AutoVideoProcessor"
-
-        processor = self.ctx.get_hf_processor(
-            processor_class,
-            image_processor=self.get_image_processor(min_pixels=min_pixels,
-                                                     max_pixels=max_pixels,
-                                                     size=size,
-                                                     use_fast=kwargs.get(
-                                                         "use_fast", True)),
+    def get_hf_processor(self, **kwargs: object) -> Qwen2_5OmniProcessor:
+        return self.ctx.get_hf_processor(
+            Qwen2_5OmniProcessor,
+            use_fast=kwargs.pop("use_fast", True),
             **kwargs,
         )
-        if not hasattr(processor, "audio_token"):
-            processor.audio_token = "<|AUDIO|>"
-        if not hasattr(processor, "image_token"):
-            processor.image_token = "<|IMAGE|>"
-        if not hasattr(processor, "video_token"):
-            processor.video_token = "<|VIDEO|>"
-        return processor
 
-    def get_feature_extractor(
-        self,
-        *,
-        sampling_rate: Optional[int] = None,
-        **kwargs: object,
-    ):
-        hf_processor = self.get_hf_processor(sampling_rate=sampling_rate)
+    def get_feature_extractor(self, **kwargs: object):
+        hf_processor = self.get_hf_processor(**kwargs)
         feature_extractor = hf_processor.feature_extractor  # type: ignore
         assert isinstance(feature_extractor, WhisperFeatureExtractor)
         return feature_extractor
diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py
index 8ae096536fdc5..c4c4650f569e1 100644
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -780,25 +780,10 @@ class Qwen2_5_VLProcessingInfo(Qwen2VLProcessingInfo):
     def get_hf_config(self):
         return self.ctx.get_hf_config(Qwen2_5_VLConfig)
 
-    def get_hf_processor(
-        self,
-        *,
-        min_pixels: Optional[int] = None,
-        max_pixels: Optional[int] = None,
-        size: Optional[dict[str, int]] = None,
-        fps: Optional[Union[float, list[float]]] = None,
-        **kwargs: object,
-    ) -> Qwen2_5_VLProcessor:
-        if fps is not None:
-            kwargs["fps"] = fps
-
+    def get_hf_processor(self, **kwargs: object) -> Qwen2_5_VLProcessor:
         return self.ctx.get_hf_processor(
             Qwen2_5_VLProcessor,
-            image_processor=self.get_image_processor(min_pixels=min_pixels,
-                                                     max_pixels=max_pixels,
-                                                     size=size,
-                                                     use_fast=kwargs.get(
-                                                         "use_fast", True)),
+            use_fast=kwargs.pop("use_fast", True),
             **kwargs,
         )
 
diff --git a/vllm/model_executor/models/qwen2_audio.py b/vllm/model_executor/models/qwen2_audio.py
index d7fec30acd8d3..3ef55cd704cf0 100644
--- a/vllm/model_executor/models/qwen2_audio.py
+++ b/vllm/model_executor/models/qwen2_audio.py
@@ -86,22 +86,12 @@ class Qwen2AudioProcessingInfo(BaseProcessingInfo):
     def get_hf_config(self):
         return self.ctx.get_hf_config(Qwen2AudioConfig)
 
-    def get_hf_processor(
-        self,
-        *,
-        # Ignored in initialization
-        sampling_rate: Optional[int] = None,
-        **kwargs: object,
-    ) -> Qwen2AudioProcessor:
+    def get_hf_processor(self, **kwargs: object) -> Qwen2AudioProcessor:
         return self.ctx.get_hf_processor(Qwen2AudioProcessor, **kwargs)
 
-    def get_feature_extractor(
-        self,
-        *,
-        # Ignored in initialization
-        sampling_rate: Optional[int] = None,
-    ) -> WhisperFeatureExtractor:
-        hf_processor = self.get_hf_processor(sampling_rate=sampling_rate)
+    def get_feature_extractor(self,
+                              **kwargs: object) -> WhisperFeatureExtractor:
+        hf_processor = self.get_hf_processor(**kwargs)
         feature_extractor = hf_processor.feature_extractor  # type: ignore
         assert isinstance(feature_extractor, WhisperFeatureExtractor)
         return feature_extractor
diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py
index ad63bb4af4e9d..4e8ea8e449133 100644
--- a/vllm/model_executor/models/qwen2_vl.py
+++ b/vllm/model_executor/models/qwen2_vl.py
@@ -69,8 +69,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.platforms import _Backend, current_platform
 from vllm.sequence import IntermediateTensors
 from vllm.transformers_utils.config import uses_mrope
-from vllm.transformers_utils.processor import (
-    cached_image_processor_from_config)
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 
 from .interfaces import (MultiModalEmbeddings, SupportsLoRA,
@@ -752,73 +750,15 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
     def get_hf_config(self):
         return self.ctx.get_hf_config(Qwen2VLConfig)
 
-    def get_hf_processor(
-        self,
-        *,
-        min_pixels: Optional[int] = None,
-        max_pixels: Optional[int] = None,
-        size: Optional[dict[str, int]] = None,
-        **kwargs: object,
-    ) -> Qwen2VLProcessor:
+    def get_hf_processor(self, **kwargs: object) -> Qwen2VLProcessor:
         return self.ctx.get_hf_processor(
             Qwen2VLProcessor,
-            image_processor=self.get_image_processor(min_pixels=min_pixels,
-                                                     max_pixels=max_pixels,
-                                                     size=size,
-                                                     use_fast=kwargs.get(
-                                                         "use_fast", True)),
+            use_fast=kwargs.pop("use_fast", True),
             **kwargs,
         )
 
-    def _get_image_processor_kwargs(
-        self,
-        *,
-        min_pixels: Optional[int] = None,
-        max_pixels: Optional[int] = None,
-        size: Optional[dict[str, int]] = None,
-        **kwargs: object,
-    ):
-        mm_config = self.ctx.model_config.get_multimodal_config()
-        if mm_config.mm_processor_kwargs:
-            kwargs.update(mm_config.mm_processor_kwargs)
-
-        if min_pixels is not None:
-            kwargs["min_pixels"] = min_pixels
-
-            if size is None:
-                size = {"shortest_edge": min_pixels}
-            else:
-                size["shortest_edge"] = min_pixels
-
-        if max_pixels is not None:
-            kwargs["max_pixels"] = max_pixels
-
-            if size is None:
-                size = {"longest_edge": max_pixels}
-            else:
-                size["longest_edge"] = max_pixels
-
-        if size is not None:
-            kwargs["size"] = size
-
-        return kwargs
-
-    def get_image_processor(
-        self,
-        *,
-        min_pixels: Optional[int] = None,
-        max_pixels: Optional[int] = None,
-        size: Optional[dict[str, int]] = None,
-        **kwargs: object,
-    ) -> Qwen2VLImageProcessor:
-        kwargs["use_fast"] = kwargs.get("use_fast", True)
-        return cached_image_processor_from_config(
-            self.ctx.model_config,
-            **self._get_image_processor_kwargs(min_pixels=min_pixels,
-                                               max_pixels=max_pixels,
-                                               size=size,
-                                               **kwargs),
-        )
+    def get_image_processor(self, **kwargs: object) -> Qwen2VLImageProcessor:
+        return self.get_hf_processor(**kwargs).image_processor
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": None, "video": None}
@@ -1023,20 +963,6 @@ class Qwen2VLMultiModalProcessor(BaseMultiModalProcessor[Qwen2VLProcessingInfo]
     def _get_data_parser(self) -> MultiModalDataParser:
         return Qwen2VLMultiModalDataParser()
 
-    def _call_hf_processor(
-        self,
-        prompt: str,
-        mm_data: Mapping[str, object],
-        mm_kwargs: Mapping[str, object],
-        tok_kwargs: Mapping[str, object],
-    ) -> BatchFeature:
-        mm_kwargs = self.info._get_image_processor_kwargs(**mm_kwargs)
-        return self.info.ctx.call_hf_processor(
-            self.info.get_hf_processor(**mm_kwargs),
-            dict(text=prompt, **mm_data),
-            dict(**mm_kwargs, **tok_kwargs),
-        )
-
     def _get_prompt_updates(
         self,
         mm_items: MultiModalDataItems,
diff --git a/vllm/model_executor/models/skyworkr1v.py b/vllm/model_executor/models/skyworkr1v.py
index 5ae5c0bc1d5dc..c76aabcd27ccb 100644
--- a/vllm/model_executor/models/skyworkr1v.py
+++ b/vllm/model_executor/models/skyworkr1v.py
@@ -7,9 +7,8 @@
 # Copyright (c) 2025 Skywork
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
-from abc import ABC, abstractmethod
 from collections.abc import Iterable, Mapping, Sequence
-from typing import Literal, Optional, TypedDict, TypeVar, Union
+from typing import Literal, Optional, TypedDict, Union
 
 import torch
 import torch.nn as nn
@@ -232,7 +231,7 @@ def image_to_pixel_values_skyworkr1v(
     return pixel_values
 
 
-class BaseSkyworkR1VProcessor(ABC):
+class SkyworkR1VProcessor:
     """
     This model doesn't define its own HF processor,
     so we implement our own one here.
@@ -279,17 +278,18 @@ class BaseSkyworkR1VProcessor(ABC):
         self.use_thumbnail: bool = config.use_thumbnail
 
     @property
-    @abstractmethod
     def image_token_id(self) -> int:
-        raise NotImplementedError
+        return self.tokenizer.get_vocab()[IMG_CONTEXT]
 
-    @abstractmethod
     def get_image_repl(
         self,
         feature_size: int,
         num_patches: Optional[int],
     ) -> PromptUpdateDetails[str]:
-        raise NotImplementedError
+        repl_features = IMG_CONTEXT * feature_size
+        repl_full = IMG_START + repl_features + IMG_END
+
+        return PromptUpdateDetails.select_text(repl_full, IMG_CONTEXT)
 
     def resolve_min_max_num(
         self,
@@ -426,35 +426,15 @@ class BaseSkyworkR1VProcessor(ABC):
         }
 
 
-class SkyworkR1VProcessor(BaseSkyworkR1VProcessor):
+class SkyworkR1VProcessingInfo(BaseProcessingInfo):
 
-    @property
-    def image_token_id(self) -> int:
-        return self.tokenizer.get_vocab()[IMG_CONTEXT]
-
-    def get_image_repl(
-        self,
-        feature_size: int,
-        num_patches: Optional[int],
-    ) -> PromptUpdateDetails[str]:
-        repl_features = IMG_CONTEXT * feature_size
-        repl_full = IMG_START + repl_features + IMG_END
-
-        return PromptUpdateDetails.select_text(repl_full, IMG_CONTEXT)
-
-
-class BaseSkyworkR1VProcessingInfo(BaseProcessingInfo):
-
-    @abstractmethod
-    def get_hf_processor(
-        self,
-        *,
-        min_dynamic_patch: Optional[int] = None,
-        max_dynamic_patch: Optional[int] = None,
-        dynamic_image_size: Optional[bool] = None,
-        **kwargs: object,
-    ) -> BaseSkyworkR1VProcessor:
-        raise NotImplementedError
+    def get_hf_processor(self, **kwargs: object) -> SkyworkR1VProcessor:
+        return self.ctx.init_processor(
+            SkyworkR1VProcessor,
+            config=self.get_hf_config(),
+            tokenizer=self.get_tokenizer(),
+            **kwargs,
+        )
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": None}
@@ -464,7 +444,7 @@ class BaseSkyworkR1VProcessingInfo(BaseProcessingInfo):
         *,
         image_width: int,
         image_height: int,
-        processor: Optional[BaseSkyworkR1VProcessor],
+        processor: Optional[SkyworkR1VProcessor],
     ) -> int:
         if processor is None:
             processor = self.get_hf_processor()
@@ -500,10 +480,8 @@ class BaseSkyworkR1VProcessingInfo(BaseProcessingInfo):
         return largest_feature_pinpoint
 
 
-_I = TypeVar("_I", bound=BaseSkyworkR1VProcessingInfo)
-
-
-class SkyworkR1VDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
+class SkyworkR1VDummyInputsBuilder(
+        BaseDummyInputsBuilder[SkyworkR1VProcessingInfo]):
 
     def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
         num_images = mm_counts.get("image", 0)
@@ -527,7 +505,8 @@ class SkyworkR1VDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
         }
 
 
-class SkyworkR1VMultiModalProcessor(BaseMultiModalProcessor[_I]):
+class SkyworkR1VMultiModalProcessor(
+        BaseMultiModalProcessor[SkyworkR1VProcessingInfo]):
 
     def _call_hf_processor(
         self,
@@ -617,31 +596,6 @@ class SkyworkR1VMultiModalProcessor(BaseMultiModalProcessor[_I]):
         ]
 
 
-class SkyworkR1VProcessingInfo(BaseSkyworkR1VProcessingInfo):
-
-    def get_hf_processor(
-        self,
-        *,
-        min_dynamic_patch: Optional[int] = None,
-        max_dynamic_patch: Optional[int] = None,
-        dynamic_image_size: Optional[bool] = None,
-        **kwargs: object,
-    ) -> SkyworkR1VProcessor:
-        if min_dynamic_patch is not None:
-            kwargs["min_dynamic_patch"] = min_dynamic_patch
-        if max_dynamic_patch is not None:
-            kwargs["max_dynamic_patch"] = max_dynamic_patch
-        if dynamic_image_size is not None:
-            kwargs["dynamic_image_size"] = dynamic_image_size
-
-        return self.ctx.init_processor(
-            SkyworkR1VProcessor,
-            config=self.get_hf_config(),
-            tokenizer=self.get_tokenizer(),
-            **kwargs,
-        )
-
-
 @MULTIMODAL_REGISTRY.register_processor(
     SkyworkR1VMultiModalProcessor,
     info=SkyworkR1VProcessingInfo,
diff --git a/vllm/model_executor/models/smolvlm.py b/vllm/model_executor/models/smolvlm.py
index 0f22ba5b406ce..2adfad67152b3 100644
--- a/vllm/model_executor/models/smolvlm.py
+++ b/vllm/model_executor/models/smolvlm.py
@@ -19,15 +19,7 @@ from .idefics3 import Idefics3ProcessingInfo
 
 class SmolVLMProcessingInfo(Idefics3ProcessingInfo):
 
-    def get_hf_processor(
-        self,
-        *,
-        max_image_size: Optional[dict[str, int]] = None,
-        **kwargs: object,
-    ) -> SmolVLMProcessor:
-        if max_image_size is not None:
-            kwargs["max_image_size"] = max_image_size
-
+    def get_hf_processor(self, **kwargs: object) -> SmolVLMProcessor:
         return self.ctx.get_hf_processor(SmolVLMProcessor, **kwargs)
 
     def _get_image_token(
diff --git a/vllm/model_executor/models/tarsier.py b/vllm/model_executor/models/tarsier.py
index 979d789b330cf..70cf5e95a54e1 100644
--- a/vllm/model_executor/models/tarsier.py
+++ b/vllm/model_executor/models/tarsier.py
@@ -178,13 +178,11 @@ class TarsierProcessingInfo(BaseProcessingInfo):
         return get_vision_encoder_info(self.get_hf_config())
 
     def get_hf_processor(self, **kwargs: object) -> TarsierProcessor:
-        hf_processor = self.ctx.get_hf_processor(TarsierProcessor, **kwargs)
-        # Patch for patch_size if needed (copied from vLLM LLaVA)
-        if hasattr(hf_processor,
-                   'patch_size') and hf_processor.patch_size is None:
-            patch_size = self.get_vision_encoder_info().get_patch_size()
-            hf_processor.patch_size = patch_size
-        return hf_processor
+        vision_info = self.get_vision_encoder_info()
+
+        kwargs.setdefault("patch_size", vision_info.get_patch_size())
+
+        return self.ctx.get_hf_processor(TarsierProcessor, **kwargs)
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"image": None}
diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py
index 8cd95605cdfae..e67548800c354 100644
--- a/vllm/model_executor/models/transformers.py
+++ b/vllm/model_executor/models/transformers.py
@@ -48,7 +48,6 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
                                         BaseProcessingInfo)
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
-from vllm.transformers_utils.processor import cached_get_processor
 from vllm.utils import is_list_of
 
 from .interfaces import (SupportsLoRA, SupportsMultiModal, SupportsPP,
@@ -189,10 +188,6 @@ class MultiModalProcessingInfo(BaseProcessingInfo):
         image_tokens = mm_tokens["num_image_tokens"][0]
         return image_tokens
 
-    def get_hf_processor(self):
-        processor = cached_get_processor(self.ctx.model_config.model)
-        return processor
-
     def get_max_image_size(self):
         return 10_000, 10_000  # hardcode for arbitrary very large size
 
diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py
index a4569ccd5a845..bef34c1be49fe 100644
--- a/vllm/model_executor/models/ultravox.py
+++ b/vllm/model_executor/models/ultravox.py
@@ -71,13 +71,7 @@ UltravoxAudioInputs = Union[UltravoxAudioFeatureInputs,
 
 class UltravoxProcessingInfo(BaseProcessingInfo):
 
-    def get_hf_processor(
-        self,
-        *,
-        # Ignored in initialization
-        sampling_rate: Optional[int] = None,
-        **kwargs: object,
-    ) -> ProcessorMixin:
+    def get_hf_processor(self, **kwargs: object) -> ProcessorMixin:
         config = self.ctx.model_config.hf_config
         hf_processor = self.ctx.get_hf_processor(**kwargs)
 
@@ -89,13 +83,9 @@ class UltravoxProcessingInfo(BaseProcessingInfo):
 
         return hf_processor
 
-    def get_feature_extractor(
-        self,
-        *,
-        # Ignored in initialization
-        sampling_rate: Optional[int] = None,
-    ) -> WhisperFeatureExtractor:
-        hf_processor = self.get_hf_processor(sampling_rate=sampling_rate)
+    def get_feature_extractor(self,
+                              **kwargs: object) -> WhisperFeatureExtractor:
+        hf_processor = self.get_hf_processor(**kwargs)
         audio_processor = hf_processor.audio_processor  # type: ignore
         feature_extractor = audio_processor.feature_extractor  # type: ignore
         assert isinstance(feature_extractor, WhisperFeatureExtractor)
@@ -156,7 +146,7 @@ class UltravoxMultiModalProcessor(
         audios = mm_data.pop("audios", [])
         assert isinstance(audios, list)
 
-        feature_extractor = self.info.get_feature_extractor()
+        feature_extractor = self.info.get_feature_extractor(**mm_kwargs)
         mm_kwargs = dict(
             **mm_kwargs,
             sampling_rate=feature_extractor.sampling_rate,
diff --git a/vllm/model_executor/models/whisper.py b/vllm/model_executor/models/whisper.py
index d7bafb9ef84d9..ca02ecd828ba3 100644
--- a/vllm/model_executor/models/whisper.py
+++ b/vllm/model_executor/models/whisper.py
@@ -623,23 +623,22 @@ class WhisperProcessingInfo(BaseProcessingInfo):
     def get_hf_config(self) -> WhisperConfig:
         return self.ctx.get_hf_config(WhisperConfig)
 
-    def get_hf_processor(self,
-                         sampling_rate: Optional[int] = None
-                         ) -> WhisperProcessor:
-        # HACK: Transformers 4.53.0 has issue with whisper tokenizer to
+    def get_hf_processor(self, **kwargs: object) -> WhisperProcessor:
+        # HACK: Transformers 4.53.2 has issue with whisper tokenizer to
         # initialize processor. We use a monkeypatch to fix it here.
         # See: https://github.com/vllm-project/vllm/issues/20224
         processor_class = WhisperProcessor
         tokenizer_class = ("WhisperTokenizer", "WhisperTokenizerFast")
         if processor_class.tokenizer_class != tokenizer_class:
             processor_class.tokenizer_class = tokenizer_class
-        return self.ctx.get_hf_processor(processor_class)
+        return self.ctx.get_hf_processor(processor_class, **kwargs)
 
     def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
         return {"audio": 1}
 
-    def get_feature_extractor(self) -> WhisperFeatureExtractor:
-        hf_processor = self.get_hf_processor()
+    def get_feature_extractor(self,
+                              **kwargs: object) -> WhisperFeatureExtractor:
+        hf_processor = self.get_hf_processor(**kwargs)
         feature_extractor = hf_processor.feature_extractor  # type: ignore
         assert isinstance(feature_extractor, WhisperFeatureExtractor)
         return feature_extractor
@@ -702,7 +701,7 @@ class WhisperMultiModalProcessor(
         tok_kwargs: Mapping[str, object],
     ) -> BatchFeature:
         if mm_data:
-            feature_extractor = self.info.get_feature_extractor()
+            feature_extractor = self.info.get_feature_extractor(**mm_kwargs)
             mm_data = dict(audio=mm_data.pop("audios"))
             mm_kwargs = dict(
                 **mm_kwargs,
diff --git a/vllm/transformers_utils/processor.py b/vllm/transformers_utils/processor.py
index 70cd08263d372..a630d940b2578 100644
--- a/vllm/transformers_utils/processor.py
+++ b/vllm/transformers_utils/processor.py
@@ -4,9 +4,15 @@
 from functools import lru_cache
 from typing import TYPE_CHECKING, Any, Optional, Union, cast
 
+from transformers import (AutoFeatureExtractor, AutoImageProcessor,
+                          AutoProcessor)
+from transformers.feature_extraction_utils import FeatureExtractionMixin
+from transformers.image_processing_utils import BaseImageProcessor
 from transformers.processing_utils import ProcessorMixin
 from typing_extensions import TypeVar
 
+from vllm.utils import get_allowed_kwarg_only_overrides
+
 if TYPE_CHECKING:
     from vllm.config import ModelConfig
 
@@ -33,23 +39,42 @@ class HashableList(list):
         return hash(tuple(self))
 
 
-def _merge_mm_kwargs(model_config: "ModelConfig", **kwargs):
-    mm_config = model_config.get_multimodal_config()
-    base_kwargs = mm_config.mm_processor_kwargs
-    if base_kwargs is None:
-        base_kwargs = {}
+def _get_processor_factory_fn(processor_cls: Union[type, tuple[type, ...]]):
+    if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
+        return AutoProcessor.from_pretrained
+    if hasattr(processor_cls, "from_pretrained"):
+        return processor_cls.from_pretrained
 
-    merged_kwargs = {**base_kwargs, **kwargs}
+    return processor_cls
+
+
+def _merge_mm_kwargs(
+    model_config: "ModelConfig",
+    processor_cls: Union[type, tuple[type, ...]],
+    /,
+    **kwargs,
+):
+    mm_config = model_config.get_multimodal_config()
+    merged_kwargs = mm_config.merge_mm_processor_kwargs(kwargs)
+
+    factory = _get_processor_factory_fn(processor_cls)
+    allowed_kwargs = get_allowed_kwarg_only_overrides(
+        factory,
+        merged_kwargs,
+        requires_kw_only=False,
+        allow_var_kwargs=True,
+    )
 
     # NOTE: Pythonic dict is not hashable and will raise unhashable type
     # error when calling `cached_get_processor`, therefore we need to
     # wrap it to a hashable dict.
-    for key, value in merged_kwargs.items():
+    for key, value in allowed_kwargs.items():
         if isinstance(value, dict):
-            merged_kwargs[key] = HashableDict(value)
+            allowed_kwargs[key] = HashableDict(value)
         if isinstance(value, list):
-            merged_kwargs[key] = HashableList(value)
-    return merged_kwargs
+            allowed_kwargs[key] = HashableList(value)
+
+    return allowed_kwargs
 
 
 def get_processor(
@@ -61,21 +86,29 @@ def get_processor(
     **kwargs: Any,
 ) -> _P:
     """Load a processor for the given model name via HuggingFace."""
-    # don't put this import at the top level
-    # it will call torch.cuda.device_count()
-    from transformers import AutoProcessor
-
-    processor_factory = (AutoProcessor if processor_cls == ProcessorMixin or
-                         isinstance(processor_cls, tuple) else processor_cls)
+    if revision is None:
+        revision = "main"
 
     try:
-        processor = processor_factory.from_pretrained(
-            processor_name,
-            *args,
-            revision=revision,
-            trust_remote_code=trust_remote_code,
-            **kwargs,
-        )
+        if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
+            processor = AutoProcessor.from_pretrained(
+                processor_name,
+                *args,
+                revision=revision,
+                trust_remote_code=trust_remote_code,
+                **kwargs,
+            )
+        elif issubclass(processor_cls, ProcessorMixin):
+            processor = processor_cls.from_pretrained(
+                processor_name,
+                *args,
+                revision=revision,
+                trust_remote_code=trust_remote_code,
+                **kwargs,
+            )
+        else:
+            # Processors that are standalone classes unrelated to HF
+            processor = processor_cls(*args, **kwargs)
     except ValueError as e:
         # If the error pertains to the processor class not existing or not
         # currently being imported, suggest using the --trust-remote-code flag.
@@ -112,7 +145,7 @@ def cached_processor_from_config(
         revision=model_config.revision,
         trust_remote_code=model_config.trust_remote_code,
         processor_cls=processor_cls,  # type: ignore[arg-type]
-        **_merge_mm_kwargs(model_config, **kwargs),
+        **_merge_mm_kwargs(model_config, processor_cls, **kwargs),
     )
 
 
@@ -125,10 +158,6 @@ def get_feature_extractor(
 ):
     """Load an audio feature extractor for the given model name 
     via HuggingFace."""
-    # don't put this import at the top level
-    # it will call torch.cuda.device_count()
-    from transformers import AutoFeatureExtractor
-    from transformers.feature_extraction_utils import FeatureExtractionMixin
     try:
         feature_extractor = AutoFeatureExtractor.from_pretrained(
             processor_name,
@@ -164,7 +193,7 @@ def cached_feature_extractor_from_config(
         model_config.model,
         revision=model_config.revision,
         trust_remote_code=model_config.trust_remote_code,
-        **_merge_mm_kwargs(model_config, **kwargs),
+        **_merge_mm_kwargs(model_config, AutoFeatureExtractor, **kwargs),
     )
 
 
@@ -176,11 +205,6 @@ def get_image_processor(
     **kwargs: Any,
 ):
     """Load an image processor for the given model name via HuggingFace."""
-    # don't put this import at the top level
-    # it will call torch.cuda.device_count()
-    from transformers import AutoImageProcessor
-    from transformers.image_processing_utils import BaseImageProcessor
-
     try:
         processor = AutoImageProcessor.from_pretrained(
             processor_name,
@@ -217,5 +241,5 @@ def cached_image_processor_from_config(
         model_config.model,
         revision=model_config.revision,
         trust_remote_code=model_config.trust_remote_code,
-        **_merge_mm_kwargs(model_config, **kwargs),
+        **_merge_mm_kwargs(model_config, AutoImageProcessor, **kwargs),
     )
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index ae978c855a8e5..a7f579b0c9c2d 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -2010,49 +2010,6 @@ def supports_kw(
     return False
 
 
-def resolve_mm_processor_kwargs(
-    init_kwargs: Optional[Mapping[str, object]],
-    inference_kwargs: Optional[Mapping[str, object]],
-    callable: Callable[..., object],
-    *,
-    requires_kw_only: bool = True,
-    allow_var_kwargs: bool = False,
-) -> dict[str, Any]:
-    """Applies filtering to eliminate invalid mm_processor_kwargs, i.e.,
-    those who are not explicit keywords to the given callable (of one is
-    given; otherwise no filtering is done), then merges the kwarg dicts,
-    giving priority to inference_kwargs if there are any collisions.
-
-    In the case that no kwarg overrides are provided, returns an empty
-    dict so that it can still be kwarg expanded into the callable later on.
-
-    If allow_var_kwargs=True, allows for things that can be expanded into
-    kwargs as long as they aren't naming collision for var_kwargs or potential
-    positional arguments.
-    """
-    # Filter inference time multimodal processor kwargs provided
-    runtime_mm_kwargs = get_allowed_kwarg_only_overrides(
-        callable,
-        overrides=inference_kwargs,
-        requires_kw_only=requires_kw_only,
-        allow_var_kwargs=allow_var_kwargs,
-    )
-
-    # Filter init time multimodal processor kwargs provided
-    init_mm_kwargs = get_allowed_kwarg_only_overrides(
-        callable,
-        overrides=init_kwargs,
-        requires_kw_only=requires_kw_only,
-        allow_var_kwargs=allow_var_kwargs,
-    )
-
-    # Merge the final processor kwargs, prioritizing inference
-    # time values over the initialization time values.
-    mm_processor_kwargs = {**init_mm_kwargs, **runtime_mm_kwargs}
-
-    return mm_processor_kwargs
-
-
 def get_allowed_kwarg_only_overrides(
     callable: Callable[..., object],
     overrides: Optional[Mapping[str, object]],

From e1a7fe4af5e9c287501c648e64956a08705af86a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Seznec?= <mickael@mistral.ai>
Date: Fri, 1 Aug 2025 07:45:02 +0200
Subject: [PATCH 106/224] [BugFix] fix: aot passes kvcache dtype information
 (#19750)

Signed-off-by: Mickael Seznec <mickael@mistral.ai>
---
 vllm/v1/attention/backends/flash_attn.py | 25 ++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index 4c2a6c6b985b2..3f9afa67aef70 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -99,6 +99,13 @@ class FlashAttentionBackend(AttentionBackend):
             raise ValueError(f"Unknown cache layout format {cache_layout}.")
         return stride_order
 
+    @staticmethod
+    def get_fp8_dtype_for_flashattn(kv_cache_dtype: str) -> torch.dtype:
+        if kv_cache_dtype in ("fp8", "fp8_e4m3"):
+            return torch.float8_e4m3fn
+        else:
+            raise ValueError(f"Unrecognized FP8 dtype: {kv_cache_dtype}")
+
 
 @dataclass
 class FlashAttentionMetadata:
@@ -161,6 +168,7 @@ class FlashAttentionMetadataBuilder(
             self.parallel_config)
         self.num_heads_kv = self.model_config.get_num_kv_heads(
             self.parallel_config)
+        self.kv_cache_dtype = kv_cache_spec.dtype
         self.headdim = self.model_config.get_head_size()
         self.block_size = kv_cache_spec.block_size
 
@@ -239,17 +247,24 @@ class FlashAttentionMetadataBuilder(
 
         def schedule(batch_size, cu_query_lens, max_query_len, seqlens,
                      max_seq_len, causal):
+            cache_dtype = self.cache_config.cache_dtype
+            if cache_dtype.startswith("fp8"):
+                qkv_dtype = FlashAttentionBackend.get_fp8_dtype_for_flashattn(
+                    cache_dtype)
+            else:
+                qkv_dtype = self.kv_cache_dtype
             if aot_schedule:
                 return get_scheduler_metadata(
                     batch_size=batch_size,
                     max_seqlen_q=max_query_len,
                     max_seqlen_k=max_seq_len,
-                    cache_seqlens=seqlens,
                     num_heads_q=self.num_heads_q,
                     num_heads_kv=self.num_heads_kv,
                     headdim=self.headdim,
-                    page_size=self.block_size,
+                    cache_seqlens=seqlens,
+                    qkv_dtype=qkv_dtype,
                     cu_seqlens_q=cu_query_lens,
+                    page_size=self.block_size,
                     causal=causal,
                     window_size=self.aot_sliding_window,
                     num_splits=self.max_num_splits,
@@ -474,8 +489,10 @@ class FlashAttentionImpl(AttentionImpl):
             )
 
         if self.kv_cache_dtype.startswith("fp8"):
-            key_cache = key_cache.view(torch.float8_e4m3fn)
-            value_cache = value_cache.view(torch.float8_e4m3fn)
+            dtype = FlashAttentionBackend.get_fp8_dtype_for_flashattn(
+                self.kv_cache_dtype)
+            key_cache = key_cache.view(dtype)
+            value_cache = value_cache.view(dtype)
             num_tokens, num_heads, head_size = query.shape
             query, _ = ops.scaled_fp8_quant(
                 query.reshape(

From 0f46a780d4f53b8564a37370f9f068cdf4e69604 Mon Sep 17 00:00:00 2001
From: Kyle Sayers <kylesayrs@gmail.com>
Date: Fri, 1 Aug 2025 01:45:15 -0400
Subject: [PATCH 107/224] [Model] [Quantization] Support quantization for
 Gemma3n (#21974)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
---
 vllm/model_executor/models/gemma3n.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/models/gemma3n.py b/vllm/model_executor/models/gemma3n.py
index a58b32793dbef..e16c03c8d3b57 100644
--- a/vllm/model_executor/models/gemma3n.py
+++ b/vllm/model_executor/models/gemma3n.py
@@ -46,6 +46,7 @@ from vllm.model_executor.model_loader.weight_utils import (
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors
 
+from .interfaces import SupportsQuant
 from .utils import (AutoWeightsLoader, extract_layer_index,
                     is_pp_missing_parameter, make_layers, maybe_prefix)
 
@@ -68,6 +69,7 @@ class Gemma3nAltUp(nn.Module):
         altup_num_inputs: int,
         altup_coef_clip: float,
         altup_active_idx: int,
+        quant_config: QuantizationConfig,
         prefix: str,
     ):
         super().__init__()
@@ -80,6 +82,7 @@ class Gemma3nAltUp(nn.Module):
             altup_num_inputs,
             altup_num_inputs,
             bias=False,
+            quant_config=quant_config,
             prefix=f"{prefix}.correction_coefs",
             return_bias=False,
         )
@@ -87,6 +90,7 @@ class Gemma3nAltUp(nn.Module):
             altup_num_inputs,
             altup_num_inputs**2,
             bias=False,
+            quant_config=quant_config,
             prefix=f"{prefix}.prediction_coefs",
             return_bias=False,
         )
@@ -94,6 +98,7 @@ class Gemma3nAltUp(nn.Module):
             hidden_size,
             altup_num_inputs,
             bias=False,
+            quant_config=quant_config,
             prefix=f"{prefix}.modality_router",
             return_bias=False,
         )
@@ -400,6 +405,7 @@ class Gemma3nDecoderLayer(nn.Module):
             altup_num_inputs=config.altup_num_inputs,
             altup_coef_clip=config.altup_coef_clip,
             altup_active_idx=config.altup_active_idx,
+            quant_config=quant_config,
             prefix=f"{prefix}.altup",
         )
         self.self_attn = Gemma3nAttention(
@@ -527,7 +533,7 @@ class Gemma3nDecoderLayer(nn.Module):
 
 
 @support_torch_compile
-class Gemma3nTextModel(nn.Module):
+class Gemma3nTextModel(nn.Module, SupportsQuant):
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
@@ -540,6 +546,7 @@ class Gemma3nTextModel(nn.Module):
         self.embed_tokens = VocabParallelEmbedding(
             config.vocab_size,
             config.hidden_size,
+            quant_config=quant_config,
             prefix=f"{prefix}.embed_tokens",
         )
         self.embed_scale = torch.tensor(
@@ -549,6 +556,7 @@ class Gemma3nTextModel(nn.Module):
         self.embed_tokens_per_layer = VocabParallelEmbedding(
             config.vocab_size_per_layer_input,
             config.num_hidden_layers * config.hidden_size_per_layer_input,
+            quant_config=quant_config,
             prefix=f"{prefix}.per_layer_embed_tokens",
         )
         self.embed_scale_per_layer = torch.tensor(
@@ -582,7 +590,7 @@ class Gemma3nTextModel(nn.Module):
                 gather_output=True,
                 return_bias=False,
                 quant_config=quant_config,
-                prefix=f"{prefix}.{idx-1}.altup_projections",
+                prefix=f"{prefix}.altup_projections.{idx-1}",
             ) for idx in range(1, self.config.altup_num_inputs)
         ])
         self.altup_unembed_projections = nn.ModuleList([
@@ -593,7 +601,7 @@ class Gemma3nTextModel(nn.Module):
                 gather_output=True,
                 return_bias=False,
                 quant_config=quant_config,
-                prefix=f"{prefix}.{idx-1}.altup_unembed_projections",
+                prefix=f"{prefix}.altup_unembed_projections.{idx-1}",
             ) for idx in range(1, self.config.altup_num_inputs)
         ])
 
@@ -774,7 +782,7 @@ class Gemma3nModel(nn.Module):
                                    **kwargs)
 
 
-class Gemma3nForConditionalGeneration(nn.Module):
+class Gemma3nForConditionalGeneration(nn.Module, SupportsQuant):
     packed_modules_mapping = {
         "qkv_proj": [
             "q_proj",

From 61dcc280faf305778c0c44597e823f40063aaed6 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Fri, 1 Aug 2025 14:10:56 +0800
Subject: [PATCH 108/224] [Doc] Add Voxtral to Supported Models page (#22059)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 docs/models/supported_models.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index f5d9e3b22f2a6..56c77a1e5f118 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -713,6 +713,7 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
 | Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/distributed_serving.md) | [V1](gh-issue:8779) |
 |--------------|--------|-------------------|----------------------|---------------------------|---------------------|
 | `WhisperForConditionalGeneration` | Whisper | `openai/whisper-small`, `openai/whisper-large-v3-turbo`, etc. | | | |
+| `VoxtralForConditionalGeneration` | Voxtral (Mistral format) | `mistralai/Voxtral-Mini-3B-2507`, `mistralai/Voxtral-Small-24B-2507`, etc. | | ✅︎ | ✅︎ |
 
 ### Pooling Models
 

From 53d7c39271aeb0568afcae337396a972e1848586 Mon Sep 17 00:00:00 2001
From: Aviad Rossmann <aviadr@neureality.ai>
Date: Fri, 1 Aug 2025 09:23:18 +0300
Subject: [PATCH 109/224] Update sampling_metadata.py (#21937)

Signed-off-by: Aviad Rossmann <aviadr@neureality.ai>
---
 vllm/model_executor/sampling_metadata.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
index 56f0f0984bfa0..66bcf1c4bfe50 100644
--- a/vllm/model_executor/sampling_metadata.py
+++ b/vllm/model_executor/sampling_metadata.py
@@ -539,37 +539,37 @@ class SamplingTensors:
         temperatures_t = torch.tensor(
             temperatures,
             device="cpu",
-            dtype=dtype,
+            dtype=torch.float32,
             pin_memory=pin_memory,
         )
         top_ps_t = torch.tensor(
             top_ps,
             device="cpu",
-            dtype=dtype,
+            dtype=torch.float32,
             pin_memory=pin_memory,
         )
         min_ps_t = torch.tensor(
             min_ps,
             device="cpu",
-            dtype=dtype,
+            dtype=torch.float32,
             pin_memory=pin_memory,
         )
         presence_penalties_t = torch.tensor(
             presence_penalties,
             device="cpu",
-            dtype=dtype,
+            dtype=torch.float32,
             pin_memory=pin_memory,
         )
         frequency_penalties_t = torch.tensor(
             frequency_penalties,
             device="cpu",
-            dtype=dtype,
+            dtype=torch.float32,
             pin_memory=pin_memory,
         )
         repetition_penalties_t = torch.tensor(
             repetition_penalties,
             device="cpu",
-            dtype=dtype,
+            dtype=torch.float32,
             pin_memory=pin_memory,
         )
         top_ks_t = torch.tensor(

From 79731a79f09dc7bbe34dc8afbe8ef2242fb94a05 Mon Sep 17 00:00:00 2001
From: Hongsheng Liu <liuhongsheng4@huawei.com>
Date: Fri, 1 Aug 2025 15:01:22 +0800
Subject: [PATCH 110/224] [Doc] Fix a syntax error of example code in
 structured_outputs.md (#22045)

Signed-off-by: wangzi <3220100013@zju.edu.cn>
Co-authored-by: wangzi <3220100013@zju.edu.cn>
---
 docs/features/structured_outputs.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/features/structured_outputs.md b/docs/features/structured_outputs.md
index 4f737afa80f55..8a934d406f382 100644
--- a/docs/features/structured_outputs.md
+++ b/docs/features/structured_outputs.md
@@ -103,7 +103,7 @@ The next example shows how to use the `guided_json` parameter with a Pydantic mo
                 "content": "Generate a JSON with the brand, model and car_type of the most iconic car from the 90's",
             }
         ],
-        "response_format": {
+        response_format={
             "type": "json_schema",
             "json_schema": {
                 "name": "car-description",

From b4e081cb150797b12039cc1232205dbb25ca0206 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Fri, 1 Aug 2025 15:03:56 +0800
Subject: [PATCH 111/224] [Bugfix] Disable multi-modal preprocessor cache for
 DP (#21896)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/config.py                |  6 ++++++
 vllm/engine/arg_utils.py      | 12 ++++++++++++
 vllm/entrypoints/cli/serve.py |  5 +++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 9d5739ca11efd..93daab7d6ae97 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -871,6 +871,12 @@ class ModelConfig:
 
         return None
 
+    def set_disable_mm_preprocessor_cache(self, value: bool) -> None:
+        mm_config = self.get_multimodal_config()
+
+        self.disable_mm_preprocessor_cache = value
+        mm_config.disable_mm_preprocessor_cache = value
+
     def _get_encoder_config(self):
         return get_sentence_transformer_tokenizer_config(
             self.model, self.revision)
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index c36c79c69317e..78272d983eaf5 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1197,6 +1197,18 @@ class EngineArgs:
             enable_multimodal_encoder_data_parallel,
         )
 
+        supports_mm_preprocessor_cache = (self.data_parallel_size == 1
+                                          or data_parallel_external_lb)
+        if (not supports_mm_preprocessor_cache
+                and model_config.is_multimodal_model
+                and not model_config.disable_mm_preprocessor_cache):
+            logger.warning(
+                "Multi-modal preprocessor cache is not compatible "
+                "with data parallelism when there does not exist a "
+                "one-to-one correspondance between API process and "
+                "EngineCore process, so the cache will be disabled.")
+            model_config.set_disable_mm_preprocessor_cache(True)
+
         speculative_config = self.create_speculative_config(
             target_model_config=model_config,
             target_parallel_config=parallel_config,
diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py
index 7dcba2cccdb52..bdbe71b832f4f 100644
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@@ -167,8 +167,9 @@ def run_multi_api_server(args: argparse.Namespace):
 
         if model_config.is_multimodal_model and not (
                 orig_disable_mm_preprocessor_cache):
-            logger.warning("Multi-model preprocessor cache will be disabled "
-                           "for api_server_count > 1")
+            logger.warning(
+                "Multi-modal preprocessor cache is not compatible "
+                "with api_server_count > 1, so the cache will be disabled.")
 
     executor_class = Executor.get_class(vllm_config)
     log_stats = not engine_args.disable_log_stats

From e0f63e4a3509a9323339eee67c96ac3c93d15923 Mon Sep 17 00:00:00 2001
From: Zebing Lin <linzebing1995@gmail.com>
Date: Fri, 1 Aug 2025 03:23:29 -0400
Subject: [PATCH 112/224] [Core] Avoid repeated len(block_token_ids) check in
 hash_request_tokens (#21781)

Signed-off-by: linzebing <linzebing1995@gmail.com>
---
 vllm/v1/core/kv_cache_utils.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 25520eb655111..eab1560b1a18c 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -567,12 +567,10 @@ def hash_request_tokens(hash_function: Any, block_size: int,
 
     ret = []
     parent_block_hash_value = None
-    for start in range(0, len(token_ids), block_size):
+    # Only full blocks will be hashed
+    for start in range(0, len(token_ids) - block_size + 1, block_size):
         end = start + block_size
         block_token_ids = token_ids[start:end]
-        # Do not hash the block if it is not full.
-        if len(block_token_ids) < block_size:
-            break
 
         if req_need_extra_keys:
             # MM and LoRA requests need extra keys for block-hash computation.

From 98df153abfcc443218aacfe61b3fd5abe2b88142 Mon Sep 17 00:00:00 2001
From: Sungyoon Jeong <157349761+n0gu-furiosa@users.noreply.github.com>
Date: Fri, 1 Aug 2025 16:54:17 +0900
Subject: [PATCH 113/224] [Frontend] Align tool_choice="required" behavior with
 OpenAI when tools is empty (#21052)

Signed-off-by: Sungyoon Jeong <sungyoon.jeong@furiosa.ai>
---
 vllm/entrypoints/openai/protocol.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index b6b3bf3f530e3..d77aee345843c 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -859,6 +859,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
                     'are supported.'
                 )
 
+            # if tool_choice is "required" but the "tools" list is empty,
+            # override the data to behave like "none" to align with
+            # OpenAI’s behavior.
+            if data["tool_choice"] == "required" and isinstance(
+                    data["tools"], list) and len(data["tools"]) == 0:
+                data["tool_choice"] = "none"
+                del data["tools"]
+                return data
+
             # ensure that if "tool_choice" is specified as an object,
             # it matches a valid tool
             correct_usage_message = 'Correct usage: `{"type": "function",' \

From da31f6ad3dacea8579adfb36d64d28759dc5c095 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Fri, 1 Aug 2025 01:26:24 -0700
Subject: [PATCH 114/224] Revert precompile wheel changes (#22055)

---
 docker/Dockerfile     |  27 +++----
 requirements/test.txt |  24 ++----
 setup.py              | 182 ++++++++++++++++++++----------------------
 vllm/envs.py          |  11 +--
 4 files changed, 107 insertions(+), 137 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 413151b3edb00..0d6afca74e867 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -206,7 +206,16 @@ ARG SCCACHE_REGION_NAME=us-west-2
 ARG SCCACHE_S3_NO_CREDENTIALS=0
 
 # Flag to control whether to use pre-built vLLM wheels
-ARG VLLM_USE_PRECOMPILED=""
+ARG VLLM_USE_PRECOMPILED
+# TODO: in setup.py VLLM_USE_PRECOMPILED is sensitive to truthiness, it will take =0 as "true", this should be fixed
+ENV VLLM_USE_PRECOMPILED=""
+RUN if [ "${VLLM_USE_PRECOMPILED}" = "1" ]; then \
+        export VLLM_USE_PRECOMPILED=1 && \
+        echo "Using precompiled wheels"; \
+    else \
+        unset VLLM_USE_PRECOMPILED && \
+        echo "Leaving VLLM_USE_PRECOMPILED unset to build wheels from source"; \
+    fi
 
 # if USE_SCCACHE is set, use sccache to speed up compilation
 RUN --mount=type=cache,target=/root/.cache/uv \
@@ -223,8 +232,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
         && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
         && export SCCACHE_IDLE_TIMEOUT=0 \
         && export CMAKE_BUILD_TYPE=Release \
-        && export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \
-        && export VLLM_DOCKER_BUILD_CONTEXT=1 \
         && sccache --show-stats \
         && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
         && sccache --show-stats; \
@@ -238,22 +245,9 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
         # Clean any existing CMake artifacts
         rm -rf .deps && \
         mkdir -p .deps && \
-        export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
-        export VLLM_DOCKER_BUILD_CONTEXT=1 && \
         python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
     fi
 
-# When using precompiled wheels, keep only the newest manylinux1 wheel and delete others
-RUN if [ "$VLLM_USE_PRECOMPILED" = "1" ]; then \
-        echo "Cleaning up extra wheels in dist/..." && \
-        # Identify the most recent manylinux1_x86_64 wheel
-        KEEP_WHEEL=$(ls -t dist/*manylinux1_x86_64.whl 2>/dev/null | head -n1) && \
-        if [ -n "$KEEP_WHEEL" ]; then \
-            echo "Keeping wheel: $KEEP_WHEEL"; \
-            find dist/ -type f -name "*.whl" ! -path "${KEEP_WHEEL}" -delete; \
-        fi; \
-    fi
-
 # Check the size of the wheel if RUN_WHEEL_CHECK is true
 COPY .buildkite/check-wheel-size.py check-wheel-size.py
 # sync the default value with .buildkite/check-wheel-size.py
@@ -369,7 +363,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     fi
 
 # Install vllm wheel first, so that torch etc will be installed.
-# !bang
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
     --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system dist/*.whl --verbose \
diff --git a/requirements/test.txt b/requirements/test.txt
index 4aaca2afea266..d45048aae5809 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -22,7 +22,9 @@ aiohttp==3.10.11
 aiohttp-cors==0.8.1
     # via ray
 aiosignal==1.3.1
-    # via aiohttp
+    # via
+    #   aiohttp
+    #   ray
 albucore==0.0.16
     # via terratorch
 albumentations==1.4.6
@@ -137,7 +139,7 @@ contourpy==1.3.0
     # via matplotlib
 cramjam==2.9.0
     # via fastparquet
-cupy-cuda12x==13.5.1
+cupy-cuda12x==13.3.0
     # via ray
 cycler==0.12.1
     # via matplotlib
@@ -224,6 +226,7 @@ frozenlist==1.5.0
     # via
     #   aiohttp
     #   aiosignal
+    #   ray
 fsspec==2024.9.0
     # via
     #   datasets
@@ -600,18 +603,10 @@ opencv-python-headless==4.11.0.86
 opentelemetry-api==1.35.0
     # via
     #   mlflow-skinny
-    #   opentelemetry-exporter-prometheus
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
-opentelemetry-exporter-prometheus==0.56b0
-    # via ray
-opentelemetry-proto==1.36.0
-    # via ray
 opentelemetry-sdk==1.35.0
-    # via
-    #   mlflow-skinny
-    #   opentelemetry-exporter-prometheus
-    #   ray
+    # via mlflow-skinny
 opentelemetry-semantic-conventions==0.56b0
     # via opentelemetry-sdk
 packaging==24.2
@@ -702,9 +697,7 @@ pqdm==0.2.0
 pretrainedmodels==0.7.4
     # via segmentation-models-pytorch
 prometheus-client==0.22.0
-    # via
-    #   opentelemetry-exporter-prometheus
-    #   ray
+    # via ray
 propcache==0.2.0
     # via yarl
 proto-plus==1.26.1
@@ -714,7 +707,6 @@ protobuf==5.28.3
     #   google-api-core
     #   googleapis-common-protos
     #   mlflow-skinny
-    #   opentelemetry-proto
     #   proto-plus
     #   ray
     #   tensorboardx
@@ -862,7 +854,7 @@ rasterio==1.4.3
     #   rioxarray
     #   terratorch
     #   torchgeo
-ray==2.48.0
+ray==2.43.0
     # via -r requirements/test.in
 redis==5.2.0
     # via tensorizer
diff --git a/setup.py b/setup.py
index bfa195d4395f0..64cfbb8db962b 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,6 @@ import json
 import logging
 import os
 import re
-import shutil
 import subprocess
 import sys
 from pathlib import Path
@@ -282,69 +281,10 @@ class cmake_build_ext(build_ext):
             self.copy_file(file, dst_file)
 
 
-class precompiled_wheel_utils:
+class repackage_wheel(build_ext):
     """Extracts libraries and other files from an existing wheel."""
 
-    @staticmethod
-    def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
-        import tempfile
-        import zipfile
-
-        temp_dir = None
-        try:
-            if not os.path.isfile(wheel_url_or_path):
-                wheel_filename = wheel_url_or_path.split("/")[-1]
-                temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
-                wheel_path = os.path.join(temp_dir, wheel_filename)
-                print(f"Downloading wheel from {wheel_url_or_path} "
-                      f"to {wheel_path}")
-                from urllib.request import urlretrieve
-                urlretrieve(wheel_url_or_path, filename=wheel_path)
-            else:
-                wheel_path = wheel_url_or_path
-                print(f"Using existing wheel at {wheel_path}")
-
-            package_data_patch = {}
-
-            with zipfile.ZipFile(wheel_path) as wheel:
-                files_to_copy = [
-                    "vllm/_C.abi3.so",
-                    "vllm/_moe_C.abi3.so",
-                    "vllm/_flashmla_C.abi3.so",
-                    "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
-                    "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
-                    "vllm/cumem_allocator.abi3.so",
-                ]
-
-                compiled_regex = re.compile(
-                    r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
-                file_members = list(
-                    filter(lambda x: x.filename in files_to_copy,
-                           wheel.filelist))
-                file_members += list(
-                    filter(lambda x: compiled_regex.match(x.filename),
-                           wheel.filelist))
-
-                for file in file_members:
-                    print(f"[extract] {file.filename}")
-                    target_path = os.path.join(".", file.filename)
-                    os.makedirs(os.path.dirname(target_path), exist_ok=True)
-                    with wheel.open(file.filename) as src, open(
-                            target_path, "wb") as dst:
-                        shutil.copyfileobj(src, dst)
-
-                    pkg = os.path.dirname(file.filename).replace("/", ".")
-                    package_data_patch.setdefault(pkg, []).append(
-                        os.path.basename(file.filename))
-
-            return package_data_patch
-        finally:
-            if temp_dir is not None:
-                print(f"Removing temporary directory {temp_dir}")
-                shutil.rmtree(temp_dir)
-
-    @staticmethod
-    def get_base_commit_in_main_branch() -> str:
+    def get_base_commit_in_main_branch(self) -> str:
         # Force to use the nightly wheel. This is mainly used for CI testing.
         if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
             return "nightly"
@@ -357,10 +297,6 @@ class precompiled_wheel_utils:
             ]).decode("utf-8")
             upstream_main_commit = json.loads(resp_json)["sha"]
 
-            # In Docker build context, .git may be immutable or missing.
-            if envs.VLLM_DOCKER_BUILD_CONTEXT:
-                return upstream_main_commit
-
             # Check if the upstream_main_commit exists in the local repo
             try:
                 subprocess.check_output(
@@ -393,15 +329,92 @@ class precompiled_wheel_utils:
                 "wheel may not be compatible with your dev branch: %s", err)
             return "nightly"
 
+    def run(self) -> None:
+        assert _is_cuda(
+        ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
+
+        wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
+        if wheel_location is None:
+            base_commit = self.get_base_commit_in_main_branch()
+            wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+            # Fallback to nightly wheel if latest commit wheel is unavailable,
+            # in this rare case, the nightly release CI hasn't finished on main.
+            if not is_url_available(wheel_location):
+                wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+
+        import zipfile
+
+        if os.path.isfile(wheel_location):
+            wheel_path = wheel_location
+            print(f"Using existing wheel={wheel_path}")
+        else:
+            # Download the wheel from a given URL, assume
+            # the filename is the last part of the URL
+            wheel_filename = wheel_location.split("/")[-1]
+
+            import tempfile
+
+            # create a temporary directory to store the wheel
+            temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
+            wheel_path = os.path.join(temp_dir, wheel_filename)
+
+            print(f"Downloading wheel from {wheel_location} to {wheel_path}")
+
+            from urllib.request import urlretrieve
+
+            try:
+                urlretrieve(wheel_location, filename=wheel_path)
+            except Exception as e:
+                from setuptools.errors import SetupError
+
+                raise SetupError(
+                    f"Failed to get vLLM wheel from {wheel_location}") from e
+
+        with zipfile.ZipFile(wheel_path) as wheel:
+            files_to_copy = [
+                "vllm/_C.abi3.so",
+                "vllm/_moe_C.abi3.so",
+                "vllm/_flashmla_C.abi3.so",
+                "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
+                "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
+                "vllm/cumem_allocator.abi3.so",
+                # "vllm/_version.py", # not available in nightly wheels yet
+            ]
+
+            file_members = list(
+                filter(lambda x: x.filename in files_to_copy, wheel.filelist))
+
+            # vllm_flash_attn python code:
+            # Regex from
+            #  `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
+            compiled_regex = re.compile(
+                r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
+            file_members += list(
+                filter(lambda x: compiled_regex.match(x.filename),
+                       wheel.filelist))
+
+            for file in file_members:
+                print(f"Extracting and including {file.filename} "
+                      "from existing wheel")
+                package_name = os.path.dirname(file.filename).replace("/", ".")
+                file_name = os.path.basename(file.filename)
+
+                if package_name not in package_data:
+                    package_data[package_name] = []
+
+                wheel.extract(file)
+                if file_name.endswith(".py"):
+                    # python files shouldn't be added to package_data
+                    continue
+
+                package_data[package_name].append(file_name)
+
 
 def _no_device() -> bool:
     return VLLM_TARGET_DEVICE == "empty"
 
 
 def _is_cuda() -> bool:
-    # Allow forced CUDA in Docker/precompiled builds, even without torch.cuda
-    if envs.VLLM_USE_PRECOMPILED and envs.VLLM_DOCKER_BUILD_CONTEXT:
-        return True
     has_cuda = torch.version.cuda is not None
     return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
             and not (_is_neuron() or _is_tpu()))
@@ -626,37 +639,16 @@ package_data = {
     ]
 }
 
-# If using precompiled, extract and patch package_data (in advance of setup)
-if envs.VLLM_USE_PRECOMPILED:
-    assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
-    wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
-    if wheel_location is not None:
-        wheel_url = wheel_location
-    else:
-        base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
-        wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
-        from urllib.request import urlopen
-        try:
-            with urlopen(wheel_url) as resp:
-                if resp.status != 200:
-                    wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
-        except Exception as e:
-            print(f"[warn] Falling back to nightly wheel: {e}")
-            wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
-
-    patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
-        wheel_url)
-    for pkg, files in patch.items():
-        package_data.setdefault(pkg, []).extend(files)
-
 if _no_device():
     ext_modules = []
 
-if not ext_modules or envs.VLLM_USE_PRECOMPILED:
-    # Disable build_ext when using precompiled wheel
+if not ext_modules:
     cmdclass = {}
 else:
-    cmdclass = {"build_ext": cmake_build_ext}
+    cmdclass = {
+        "build_ext":
+        repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
+    }
 
 setup(
     # static metadata should rather go in pyproject.toml
diff --git a/vllm/envs.py b/vllm/envs.py
index 19bc9156b2586..7553eccf16ea9 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -68,7 +68,6 @@ if TYPE_CHECKING:
     MAX_JOBS: Optional[str] = None
     NVCC_THREADS: Optional[str] = None
     VLLM_USE_PRECOMPILED: bool = False
-    VLLM_DOCKER_BUILD_CONTEXT: bool = False
     VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False
     VLLM_NO_DEPRECATION_WARNING: bool = False
     VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
@@ -228,14 +227,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
 
     # If set, vllm will use precompiled binaries (*.so)
     "VLLM_USE_PRECOMPILED":
-    lambda: os.environ.get("VLLM_USE_PRECOMPILED", "").strip().lower() in
-    ("1", "true") or bool(os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
-
-    # Used to mark that setup.py is running in a Docker build context,
-    # in order to force the use of precompiled binaries.
-    "VLLM_DOCKER_BUILD_CONTEXT":
-    lambda: os.environ.get("VLLM_DOCKER_BUILD_CONTEXT", "").strip().lower() in
-    ("1", "true"),
+    lambda: bool(os.environ.get("VLLM_USE_PRECOMPILED")) or bool(
+        os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
 
     # Whether to force using nightly wheel in python build.
     # This is used for testing the nightly wheel in python build.

From 27a145e8931582fc74c1f46e0e4630c610b96160 Mon Sep 17 00:00:00 2001
From: Roger Wang <hey@rogerw.me>
Date: Fri, 1 Aug 2025 01:35:49 -0700
Subject: [PATCH 115/224] [Doc] Add example for Step3-VL (#22061)

Signed-off-by: Roger Wang <hey@rogerw.me>
---
 examples/offline_inference/vision_language.py | 298 ++++++++++--------
 .../vision_language_multi_image.py            | 215 +++++++------
 2 files changed, 286 insertions(+), 227 deletions(-)

diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
index 0edcd0407747c..a75b8e2b047d8 100644
--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
@@ -423,32 +423,6 @@ def run_idefics3(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
-# SmolVLM2-2.2B-Instruct
-def run_smolvlm(questions: list[str], modality: str) -> ModelRequestData:
-    assert modality == "image"
-    model_name = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
-
-    engine_args = EngineArgs(
-        model=model_name,
-        max_model_len=8192,
-        max_num_seqs=2,
-        enforce_eager=True,
-        mm_processor_kwargs={
-            "max_image_size": {"longest_edge": 384},
-        },
-        limit_mm_per_prompt={modality: 1},
-    )
-    prompts = [
-        (f"<|im_start|>User:<image>{question}<end_of_utterance>\nAssistant:")
-        for question in questions
-    ]
-
-    return ModelRequestData(
-        engine_args=engine_args,
-        prompts=prompts,
-    )
-
-
 # Intern-S1
 def run_interns1(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "internlm/Intern-S1"
@@ -522,44 +496,6 @@ def run_internvl(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
-# Nemontron_VL
-def run_nemotron_vl(questions: list[str], modality: str) -> ModelRequestData:
-    model_name = "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1"
-
-    engine_args = EngineArgs(
-        model=model_name,
-        trust_remote_code=True,
-        max_model_len=8192,
-        limit_mm_per_prompt={modality: 1},
-    )
-
-    assert modality == "image"
-    placeholder = "<image>"
-
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-    messages = [
-        [{"role": "user", "content": f"{placeholder}\n{question}"}]
-        for question in questions
-    ]
-    prompts = tokenizer.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-
-    # Stop tokens for InternVL
-    # models variants may have different stop tokens
-    # please refer to the model card for the correct "stop words":
-    # https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/conversation.py
-    stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"]
-    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
-    stop_token_ids = [token_id for token_id in stop_token_ids if token_id is not None]
-
-    return ModelRequestData(
-        engine_args=engine_args,
-        prompts=prompts,
-        stop_token_ids=stop_token_ids,
-    )
-
-
 # Keye-VL
 def run_keye_vl(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "Kwai-Keye/Keye-VL-8B-Preview"
@@ -615,6 +551,41 @@ def run_kimi_vl(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
+def run_llama4(questions: list[str], modality: str) -> ModelRequestData:
+    assert modality == "image"
+
+    model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=8192,
+        max_num_seqs=4,
+        tensor_parallel_size=8,
+        gpu_memory_utilization=0.4,
+        limit_mm_per_prompt={modality: 1},
+    )
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    messages = [
+        [
+            {
+                "role": "user",
+                "content": [{"type": "image"}, {"type": "text", "text": f"{question}"}],
+            }
+        ]
+        for question in questions
+    ]
+    prompts = tokenizer.apply_chat_template(
+        messages, add_generation_prompt=True, tokenize=False
+    )
+    stop_token_ids = None
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+        stop_token_ids=stop_token_ids,
+    )
+
+
 # LLaVA-1.5
 def run_llava(questions: list[str], modality: str) -> ModelRequestData:
     assert modality == "image"
@@ -857,41 +828,6 @@ def run_mllama(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
-def run_llama4(questions: list[str], modality: str) -> ModelRequestData:
-    assert modality == "image"
-
-    model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
-
-    engine_args = EngineArgs(
-        model=model_name,
-        max_model_len=8192,
-        max_num_seqs=4,
-        tensor_parallel_size=8,
-        gpu_memory_utilization=0.4,
-        limit_mm_per_prompt={modality: 1},
-    )
-
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    messages = [
-        [
-            {
-                "role": "user",
-                "content": [{"type": "image"}, {"type": "text", "text": f"{question}"}],
-            }
-        ]
-        for question in questions
-    ]
-    prompts = tokenizer.apply_chat_template(
-        messages, add_generation_prompt=True, tokenize=False
-    )
-    stop_token_ids = None
-    return ModelRequestData(
-        engine_args=engine_args,
-        prompts=prompts,
-        stop_token_ids=stop_token_ids,
-    )
-
-
 # Molmo
 def run_molmo(questions: list[str], modality: str) -> ModelRequestData:
     assert modality == "image"
@@ -917,6 +853,44 @@ def run_molmo(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
+# Nemontron_VL
+def run_nemotron_vl(questions: list[str], modality: str) -> ModelRequestData:
+    model_name = "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=8192,
+        limit_mm_per_prompt={modality: 1},
+    )
+
+    assert modality == "image"
+    placeholder = "<image>"
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    messages = [
+        [{"role": "user", "content": f"{placeholder}\n{question}"}]
+        for question in questions
+    ]
+    prompts = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+
+    # Stop tokens for InternVL
+    # models variants may have different stop tokens
+    # please refer to the model card for the correct "stop words":
+    # https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/conversation.py
+    stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"]
+    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
+    stop_token_ids = [token_id for token_id in stop_token_ids if token_id is not None]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+        stop_token_ids=stop_token_ids,
+    )
+
+
 # NVLM-D
 def run_nvlm_d(questions: list[str], modality: str) -> ModelRequestData:
     assert modality == "image"
@@ -1274,6 +1248,94 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
     )
 
 
+# SkyworkR1V
+def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData:
+    assert modality == "image"
+
+    model_name = "Skywork/Skywork-R1V-38B"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=4096,
+        limit_mm_per_prompt={modality: 1},
+    )
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    messages = [
+        [{"role": "user", "content": f"<image>\n{question}"}] for question in questions
+    ]
+    prompts = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+
+    # Stop tokens for SkyworkR1V
+    # https://huggingface.co/Skywork/Skywork-R1V-38B/blob/main/conversation.py
+    stop_tokens = ["<｜end▁of▁sentence｜>", "<|endoftext|>"]
+    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+        stop_token_ids=stop_token_ids,
+    )
+
+
+# SmolVLM2-2.2B-Instruct
+def run_smolvlm(questions: list[str], modality: str) -> ModelRequestData:
+    assert modality == "image"
+    model_name = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=8192,
+        max_num_seqs=2,
+        enforce_eager=True,
+        mm_processor_kwargs={
+            "max_image_size": {"longest_edge": 384},
+        },
+        limit_mm_per_prompt={modality: 1},
+    )
+    prompts = [
+        (f"<|im_start|>User:<image>{question}<end_of_utterance>\nAssistant:")
+        for question in questions
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
+# Step3
+def run_step3(questions: list[str], modality: str) -> ModelRequestData:
+    assert modality == "image"
+
+    model_name = "stepfun-ai/step3-fp8"
+
+    # NOTE: Below are verified configurations for step3-fp8
+    # on 8xH100 GPUs.
+    engine_args = EngineArgs(
+        model=model_name,
+        max_num_batched_tokens=4096,
+        gpu_memory_utilization=0.85,
+        tensor_parallel_size=8,
+        limit_mm_per_prompt={modality: 1},
+        reasoning_parser="step3",
+    )
+
+    prompts = [
+        "<｜begin▁of▁sentence｜> You are a helpful assistant. <|BOT|>user\n "
+        f"<im_patch>{question} <|EOT|><|BOT|>assistant\n<think>\n"
+        for question in questions
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
 # omni-research/Tarsier-7b
 def run_tarsier(questions: list[str], modality: str) -> ModelRequestData:
     assert modality == "image"
@@ -1324,39 +1386,6 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
-# SkyworkR1V
-def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData:
-    assert modality == "image"
-
-    model_name = "Skywork/Skywork-R1V-38B"
-
-    engine_args = EngineArgs(
-        model=model_name,
-        trust_remote_code=True,
-        max_model_len=4096,
-        limit_mm_per_prompt={modality: 1},
-    )
-
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-    messages = [
-        [{"role": "user", "content": f"<image>\n{question}"}] for question in questions
-    ]
-    prompts = tokenizer.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-
-    # Stop tokens for SkyworkR1V
-    # https://huggingface.co/Skywork/Skywork-R1V-38B/blob/main/conversation.py
-    stop_tokens = ["<｜end▁of▁sentence｜>", "<|endoftext|>"]
-    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
-
-    return ModelRequestData(
-        engine_args=engine_args,
-        prompts=prompts,
-        stop_token_ids=stop_token_ids,
-    )
-
-
 model_example_map = {
     "aria": run_aria,
     "aya_vision": run_aya_vision,
@@ -1373,9 +1402,9 @@ model_example_map = {
     "idefics3": run_idefics3,
     "interns1": run_interns1,
     "internvl_chat": run_internvl,
-    "nemotron_vl": run_nemotron_vl,
     "keye_vl": run_keye_vl,
     "kimi_vl": run_kimi_vl,
+    "llama4": run_llama4,
     "llava": run_llava,
     "llava-next": run_llava_next,
     "llava-next-video": run_llava_next_video,
@@ -1385,8 +1414,8 @@ model_example_map = {
     "minicpmv": run_minicpmv,
     "mistral3": run_mistral3,
     "mllama": run_mllama,
-    "llama4": run_llama4,
     "molmo": run_molmo,
+    "nemotron_vl": run_nemotron_vl,
     "NVLM_D": run_nvlm_d,
     "ovis": run_ovis,
     "paligemma": run_paligemma,
@@ -1401,6 +1430,7 @@ model_example_map = {
     "qwen2_5_omni": run_qwen2_5_omni,
     "skywork_chat": run_skyworkr1v,
     "smolvlm": run_smolvlm,
+    "step3": run_step3,
     "tarsier": run_tarsier,
     "tarsier2": run_tarsier2,
 }
diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py
index dd50f3639709e..1ab405fa14f3a 100644
--- a/examples/offline_inference/vision_language_multi_image.py
+++ b/examples/offline_inference/vision_language_multi_image.py
@@ -197,6 +197,53 @@ def load_h2ovl(question: str, image_urls: list[str]) -> ModelRequestData:
     )
 
 
+def load_hyperclovax_seed_vision(
+    question: str, image_urls: list[str]
+) -> ModelRequestData:
+    model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=16384,
+        limit_mm_per_prompt={"image": len(image_urls)},
+    )
+
+    message = {"role": "user", "content": list()}
+    for _image_url in image_urls:
+        message["content"].append(
+            {
+                "type": "image",
+                "image": _image_url,
+                "ocr": "",
+                "lens_keywords": "",
+                "lens_local_keywords": "",
+            }
+        )
+    message["content"].append(
+        {
+            "type": "text",
+            "text": question,
+        }
+    )
+
+    prompt = tokenizer.apply_chat_template(
+        [
+            message,
+        ],
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        stop_token_ids=None,
+        image_data=[fetch_image(url) for url in image_urls],
+    )
+
+
 def load_idefics3(question: str, image_urls: list[str]) -> ModelRequestData:
     model_name = "HuggingFaceM4/Idefics3-8B-Llama3"
 
@@ -225,34 +272,6 @@ def load_idefics3(question: str, image_urls: list[str]) -> ModelRequestData:
     )
 
 
-def load_smolvlm(question: str, image_urls: list[str]) -> ModelRequestData:
-    model_name = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
-
-    # The configuration below has been confirmed to launch on a single L40 GPU.
-    engine_args = EngineArgs(
-        model=model_name,
-        max_model_len=8192,
-        max_num_seqs=16,
-        enforce_eager=True,
-        limit_mm_per_prompt={"image": len(image_urls)},
-        mm_processor_kwargs={
-            "max_image_size": {"longest_edge": 384},
-        },
-    )
-
-    placeholders = "\n".join(
-        f"Image-{i}: <image>\n" for i, _ in enumerate(image_urls, start=1)
-    )
-    prompt = (
-        f"<|im_start|>User:{placeholders}\n{question}<end_of_utterance>\nAssistant:"  # noqa: E501
-    )
-    return ModelRequestData(
-        engine_args=engine_args,
-        prompt=prompt,
-        image_data=[fetch_image(url) for url in image_urls],
-    )
-
-
 def load_interns1(question: str, image_urls: list[str]) -> ModelRequestData:
     model_name = "internlm/Intern-S1"
 
@@ -316,49 +335,36 @@ def load_internvl(question: str, image_urls: list[str]) -> ModelRequestData:
     )
 
 
-def load_hyperclovax_seed_vision(
-    question: str, image_urls: list[str]
-) -> ModelRequestData:
-    model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
 
     engine_args = EngineArgs(
         model=model_name,
-        trust_remote_code=True,
-        max_model_len=16384,
+        max_model_len=131072,
+        tensor_parallel_size=8,
         limit_mm_per_prompt={"image": len(image_urls)},
     )
 
-    message = {"role": "user", "content": list()}
-    for _image_url in image_urls:
-        message["content"].append(
-            {
-                "type": "image",
-                "image": _image_url,
-                "ocr": "",
-                "lens_keywords": "",
-                "lens_local_keywords": "",
-            }
-        )
-    message["content"].append(
+    placeholders = [{"type": "image", "image": url} for url in image_urls]
+    messages = [
         {
-            "type": "text",
-            "text": question,
+            "role": "user",
+            "content": [
+                *placeholders,
+                {"type": "text", "text": question},
+            ],
         }
-    )
+    ]
 
-    prompt = tokenizer.apply_chat_template(
-        [
-            message,
-        ],
-        tokenize=False,
-        add_generation_prompt=True,
+    processor = AutoProcessor.from_pretrained(model_name)
+
+    prompt = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
     )
 
     return ModelRequestData(
         engine_args=engine_args,
         prompt=prompt,
-        stop_token_ids=None,
         image_data=[fetch_image(url) for url in image_urls],
     )
 
@@ -463,40 +469,6 @@ def load_llava_onevision(question: str, image_urls: list[str]) -> ModelRequestDa
     )
 
 
-def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
-    model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
-
-    engine_args = EngineArgs(
-        model=model_name,
-        max_model_len=131072,
-        tensor_parallel_size=8,
-        limit_mm_per_prompt={"image": len(image_urls)},
-    )
-
-    placeholders = [{"type": "image", "image": url} for url in image_urls]
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                *placeholders,
-                {"type": "text", "text": question},
-            ],
-        }
-    ]
-
-    processor = AutoProcessor.from_pretrained(model_name)
-
-    prompt = processor.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-
-    return ModelRequestData(
-        engine_args=engine_args,
-        prompt=prompt,
-        image_data=[fetch_image(url) for url in image_urls],
-    )
-
-
 def load_keye_vl(question: str, image_urls: list[str]) -> ModelRequestData:
     model_name = "Kwai-Keye/Keye-VL-8B-Preview"
 
@@ -954,6 +926,62 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
     )
 
 
+def load_smolvlm(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
+
+    # The configuration below has been confirmed to launch on a single L40 GPU.
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=8192,
+        max_num_seqs=16,
+        enforce_eager=True,
+        limit_mm_per_prompt={"image": len(image_urls)},
+        mm_processor_kwargs={
+            "max_image_size": {"longest_edge": 384},
+        },
+    )
+
+    placeholders = "\n".join(
+        f"Image-{i}: <image>\n" for i, _ in enumerate(image_urls, start=1)
+    )
+    prompt = (
+        f"<|im_start|>User:{placeholders}\n{question}<end_of_utterance>\nAssistant:"  # noqa: E501
+    )
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        image_data=[fetch_image(url) for url in image_urls],
+    )
+
+
+def load_step3(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "stepfun-ai/step3-fp8"
+
+    # NOTE: Below are verified configurations for step3-fp8
+    # on 8xH100 GPUs.
+    engine_args = EngineArgs(
+        model=model_name,
+        max_num_batched_tokens=4096,
+        gpu_memory_utilization=0.85,
+        tensor_parallel_size=8,
+        limit_mm_per_prompt={"image": len(image_urls)},
+        reasoning_parser="step3",
+    )
+
+    prompt = (
+        "<｜begin▁of▁sentence｜> You are a helpful assistant. <|BOT|>user\n "
+        f"{'<im_patch>' * len(image_urls)}{question} <|EOT|><|BOT|"
+        ">assistant\n<think>\n"
+    )
+    image_data = [fetch_image(url) for url in image_urls]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        image_data=image_data,
+    )
+
+
 def load_tarsier(question: str, image_urls: list[str]) -> ModelRequestData:
     model_name = "omni-research/Tarsier-7b"
 
@@ -1006,16 +1034,16 @@ model_example_map = {
     "deepseek_vl_v2": load_deepseek_vl2,
     "gemma3": load_gemma3,
     "h2ovl_chat": load_h2ovl,
+    "hyperclovax_seed_vision": load_hyperclovax_seed_vision,
     "idefics3": load_idefics3,
     "interns1": load_interns1,
     "internvl_chat": load_internvl,
-    "hyperclovax_seed_vision": load_hyperclovax_seed_vision,
     "keye_vl": load_keye_vl,
     "kimi_vl": load_kimi_vl,
+    "llama4": load_llama4,
     "llava": load_llava,
     "llava-next": load_llava_next,
     "llava-onevision": load_llava_onevision,
-    "llama4": load_llama4,
     "mistral3": load_mistral3,
     "mllama": load_mllama,
     "NVLM_D": load_nvlm_d,
@@ -1028,6 +1056,7 @@ model_example_map = {
     "qwen2_vl": load_qwen2_vl,
     "qwen2_5_vl": load_qwen2_5_vl,
     "smolvlm": load_smolvlm,
+    "step3": load_step3,
     "tarsier": load_tarsier,
     "tarsier2": load_tarsier2,
 }

From e6680f9e25a433bcd754181705e72034ce6c470c Mon Sep 17 00:00:00 2001
From: wuhang <wuhang6@huawei.com>
Date: Fri, 1 Aug 2025 17:04:16 +0800
Subject: [PATCH 116/224] [Bugfix] Add log prefix in non-dp mode engine core
 (#21889)

Signed-off-by: wuhang <wuhang6@huawei.com>
---
 vllm/entrypoints/cli/serve.py           | 11 +----
 vllm/entrypoints/openai/api_server.py   | 12 ++----
 vllm/executor/multiproc_worker_utils.py | 42 ++-----------------
 vllm/utils/__init__.py                  | 55 ++++++++++++++++++++++++-
 vllm/v1/engine/core.py                  | 22 ++--------
 vllm/v1/executor/multiproc_executor.py  | 14 +++----
 6 files changed, 75 insertions(+), 81 deletions(-)

diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py
index bdbe71b832f4f..0305354a66e85 100644
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@@ -2,9 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import argparse
-import os
 import signal
-import sys
 from typing import Optional
 
 import uvloop
@@ -18,10 +16,9 @@ from vllm.entrypoints.openai.cli_args import (make_arg_parser,
                                               validate_parsed_serve_args)
 from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG,
                                     show_filtered_argument_or_group_from_help)
-from vllm.executor.multiproc_worker_utils import _add_prefix
 from vllm.logger import init_logger
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import FlexibleArgumentParser, get_tcp_uri
+from vllm.utils import FlexibleArgumentParser, decorate_logs, get_tcp_uri
 from vllm.v1.engine.core import EngineCoreProc
 from vllm.v1.engine.utils import CoreEngineProcManager, launch_core_engines
 from vllm.v1.executor.abstract import Executor
@@ -229,11 +226,7 @@ def run_api_server_worker_proc(listen_address,
     """Entrypoint for individual API server worker processes."""
 
     # Add process-specific prefix to stdout and stderr.
-    from multiprocessing import current_process
-    process_name = current_process().name
-    pid = os.getpid()
-    _add_prefix(sys.stdout, process_name, pid)
-    _add_prefix(sys.stderr, process_name, pid)
+    decorate_logs()
 
     uvloop.run(
         run_server_worker(listen_address, sock, args, client_config,
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 05d9a69a65f83..26db1357da4d0 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -11,7 +11,6 @@ import multiprocessing
 import os
 import signal
 import socket
-import sys
 import tempfile
 import uuid
 from argparse import Namespace
@@ -95,15 +94,15 @@ from vllm.entrypoints.openai.serving_transcription import (
 from vllm.entrypoints.openai.tool_parsers import ToolParserManager
 from vllm.entrypoints.utils import (cli_env_setup, load_aware_call,
                                     log_non_default_args, with_cancellation)
-from vllm.executor.multiproc_worker_utils import _add_prefix
 from vllm.logger import init_logger
 from vllm.reasoning import ReasoningParserManager
 from vllm.transformers_utils.config import (
     maybe_register_config_serialize_by_value)
 from vllm.transformers_utils.tokenizer import MistralTokenizer
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import (Device, FlexibleArgumentParser, get_open_zmq_ipc_path,
-                        is_valid_ipv6_address, set_process_title, set_ulimit)
+from vllm.utils import (Device, FlexibleArgumentParser, decorate_logs,
+                        get_open_zmq_ipc_path, is_valid_ipv6_address,
+                        set_process_title, set_ulimit)
 from vllm.v1.metrics.prometheus import get_prometheus_registry
 from vllm.version import __version__ as VLLM_VERSION
 
@@ -1808,10 +1807,7 @@ async def run_server(args, **uvicorn_kwargs) -> None:
     """Run a single-worker API server."""
 
     # Add process-specific prefix to stdout and stderr.
-    process_name = "APIServer"
-    pid = os.getpid()
-    _add_prefix(sys.stdout, process_name, pid)
-    _add_prefix(sys.stderr, process_name, pid)
+    decorate_logs("APIServer")
 
     listen_address, sock = setup_server(args)
     await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
diff --git a/vllm/executor/multiproc_worker_utils.py b/vllm/executor/multiproc_worker_utils.py
index a6c172beff7bb..48b3479ed7997 100644
--- a/vllm/executor/multiproc_worker_utils.py
+++ b/vllm/executor/multiproc_worker_utils.py
@@ -3,21 +3,20 @@
 
 import asyncio
 import os
-import sys
 import threading
 import uuid
 from dataclasses import dataclass
 from multiprocessing import Queue
 from multiprocessing.connection import wait
 from multiprocessing.process import BaseProcess
-from typing import (Any, Callable, Dict, Generic, List, Optional, TextIO,
-                    TypeVar, Union)
+from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar, Union
 
 import torch
 
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
-from vllm.utils import _maybe_force_spawn, get_mp_context, run_method
+from vllm.utils import (_maybe_force_spawn, decorate_logs, get_mp_context,
+                        run_method)
 
 logger = init_logger(__name__)
 
@@ -25,10 +24,6 @@ T = TypeVar('T')
 
 _TERMINATE = "TERMINATE"  # sentinel
 
-# ANSI color codes
-CYAN = '\033[1;36m'
-RESET = '\033[0;0m'
-
 JOIN_TIMEOUT_S = 2
 
 
@@ -213,9 +208,7 @@ def _run_worker_process(
 
     # Add process-specific prefix to stdout and stderr
     process_name = get_mp_context().current_process().name
-    pid = os.getpid()
-    _add_prefix(sys.stdout, process_name, pid)
-    _add_prefix(sys.stderr, process_name, pid)
+    decorate_logs(process_name)
 
     # Initialize worker
     worker = worker_factory(vllm_config, rank)
@@ -260,33 +253,6 @@ def _run_worker_process(
     logger.info("Worker exiting")
 
 
-def _add_prefix(file: TextIO, worker_name: str, pid: int) -> None:
-    """Prepend each output line with process-specific prefix"""
-
-    prefix = f"{CYAN}({worker_name} pid={pid}){RESET} "
-    file_write = file.write
-
-    def write_with_prefix(s: str):
-        if not s:
-            return
-        if file.start_new_line:  # type: ignore[attr-defined]
-            file_write(prefix)
-        idx = 0
-        while (next_idx := s.find('\n', idx)) != -1:
-            next_idx += 1
-            file_write(s[idx:next_idx])
-            if next_idx == len(s):
-                file.start_new_line = True  # type: ignore[attr-defined]
-                return
-            file_write(prefix)
-            idx = next_idx
-        file_write(s[idx:])
-        file.start_new_line = False  # type: ignore[attr-defined]
-
-    file.start_new_line = True  # type: ignore[attr-defined]
-    file.write = write_with_prefix  # type: ignore[method-assign]
-
-
 def set_multiprocessing_worker_envs(parallel_config):
     """ Set up environment variables that should be used when there are workers
     in a multiprocessing environment. This should be called by the parent 
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index a7f579b0c9c2d..d5d8d9dad73a8 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -47,7 +47,7 @@ from dataclasses import dataclass, field
 from functools import cache, lru_cache, partial, wraps
 from types import MappingProxyType
 from typing import (TYPE_CHECKING, Any, Callable, Generic, Literal, NamedTuple,
-                    Optional, Tuple, TypeVar, Union, cast, overload)
+                    Optional, TextIO, Tuple, TypeVar, Union, cast, overload)
 from urllib.parse import urlparse
 from uuid import uuid4
 
@@ -167,6 +167,10 @@ GB_bytes = 1_000_000_000
 GiB_bytes = 1 << 30
 """The number of bytes in one gibibyte (GiB)."""
 
+# ANSI color codes
+CYAN = '\033[1;36m'
+RESET = '\033[0;0m'
+
 STR_DTYPE_TO_TORCH_DTYPE = {
     "half": torch.half,
     "bfloat16": torch.bfloat16,
@@ -3258,3 +3262,52 @@ def set_process_title(name: str,
     else:
         name = f"{envs.VLLM_PROCESS_NAME_PREFIX}::{name}"
     setproctitle.setproctitle(name)
+
+
+def _add_prefix(file: TextIO, worker_name: str, pid: int) -> None:
+    """Prepend each output line with process-specific prefix"""
+
+    prefix = f"{CYAN}({worker_name} pid={pid}){RESET} "
+    file_write = file.write
+
+    def write_with_prefix(s: str):
+        if not s:
+            return
+        if file.start_new_line:  # type: ignore[attr-defined]
+            file_write(prefix)
+        idx = 0
+        while (next_idx := s.find('\n', idx)) != -1:
+            next_idx += 1
+            file_write(s[idx:next_idx])
+            if next_idx == len(s):
+                file.start_new_line = True  # type: ignore[attr-defined]
+                return
+            file_write(prefix)
+            idx = next_idx
+        file_write(s[idx:])
+        file.start_new_line = False  # type: ignore[attr-defined]
+
+    file.start_new_line = True  # type: ignore[attr-defined]
+    file.write = write_with_prefix  # type: ignore[method-assign]
+
+
+def decorate_logs(process_name: Optional[str] = None) -> None:
+    """
+    Adds a process-specific prefix to each line of output written to stdout and
+    stderr.
+
+    This function is intended to be called before initializing the api_server,
+    engine_core, or worker classes, so that all subsequent output from the
+    process is prefixed with the process name and PID. This helps distinguish
+    log output from different processes in multi-process environments.
+
+    Args:
+        process_name: Optional; the name of the process to use in the prefix.
+            If not provided, the current process name from the multiprocessing
+            context is used.
+    """
+    if process_name is None:
+        process_name = get_mp_context().current_process().name
+    pid = os.getpid()
+    _add_prefix(sys.stdout, process_name, pid)
+    _add_prefix(sys.stderr, process_name, pid)
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index f9a6315df8af8..6ae5736df98b8 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -3,7 +3,6 @@
 import os
 import queue
 import signal
-import sys
 import threading
 import time
 from collections import deque
@@ -19,15 +18,14 @@ import zmq
 
 from vllm.config import ParallelConfig, VllmConfig
 from vllm.distributed import stateless_destroy_torch_distributed_process_group
-from vllm.executor.multiproc_worker_utils import _add_prefix
 from vllm.logger import init_logger
 from vllm.logging_utils.dump_input import dump_engine_exception
 from vllm.lora.request import LoRARequest
 from vllm.tasks import POOLING_TASKS, SupportedTask
 from vllm.transformers_utils.config import (
     maybe_register_config_serialize_by_value)
-from vllm.utils import (make_zmq_socket, resolve_obj_by_qualname,
-                        set_process_title)
+from vllm.utils import (decorate_logs, make_zmq_socket,
+                        resolve_obj_by_qualname, set_process_title)
 from vllm.v1.core.kv_cache_utils import (get_kv_cache_config,
                                          unify_kv_cache_configs)
 from vllm.v1.core.sched.interface import SchedulerInterface
@@ -649,12 +647,14 @@ class EngineCoreProc(EngineCore):
                 "vllm_config"].parallel_config
             if parallel_config.data_parallel_size > 1 or dp_rank > 0:
                 set_process_title("DPEngineCore", str(dp_rank))
+                decorate_logs()
                 # Set data parallel rank for this engine process.
                 parallel_config.data_parallel_rank = dp_rank
                 parallel_config.data_parallel_rank_local = local_dp_rank
                 engine_core = DPEngineCoreProc(*args, **kwargs)
             else:
                 set_process_title("EngineCore")
+                decorate_logs()
                 engine_core = EngineCoreProc(*args, **kwargs)
 
             engine_core.run_busy_loop()
@@ -905,8 +905,6 @@ class DPEngineCoreProc(EngineCoreProc):
         log_stats: bool,
         client_handshake_address: Optional[str] = None,
     ):
-        self._decorate_logs()
-
         # Counts forward-passes of the model so that we can synchronize
         # finished with DP peers every N steps.
         self.counter = 0
@@ -919,15 +917,6 @@ class DPEngineCoreProc(EngineCoreProc):
                          executor_class, log_stats, client_handshake_address,
                          dp_rank)
 
-    def _decorate_logs(self):
-        # Add process-specific prefix to stdout and stderr before
-        # we initialize the engine.
-        from multiprocessing import current_process
-        process_name = current_process().name
-        pid = os.getpid()
-        _add_prefix(sys.stdout, process_name, pid)
-        _add_prefix(sys.stderr, process_name, pid)
-
     def _init_data_parallel(self, vllm_config: VllmConfig):
 
         # Configure GPUs and stateless process group for data parallel.
@@ -1149,9 +1138,6 @@ class DPEngineCoreActor(DPEngineCoreProc):
                 f"{(local_dp_rank + 1) * world_size}) "
                 f"base value: \"{os.getenv(device_control_env_var)}\"") from e
 
-    def _decorate_logs(self):
-        pass
-
     @contextmanager
     def _perform_handshakes(self, handshake_address: str, identity: bytes,
                             local_client: bool, vllm_config: VllmConfig,
diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py
index 8270385053852..d90051c3224fd 100644
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -4,7 +4,6 @@ import multiprocessing
 import os
 import pickle
 import signal
-import sys
 import threading
 import time
 import traceback
@@ -28,10 +27,11 @@ from vllm.distributed.device_communicators.shm_broadcast import (Handle,
                                                                  MessageQueue)
 from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
 from vllm.executor.multiproc_worker_utils import (
-    _add_prefix, set_multiprocessing_worker_envs)
+    set_multiprocessing_worker_envs)
 from vllm.logger import init_logger
-from vllm.utils import (get_distributed_init_method, get_loopback_ip,
-                        get_mp_context, get_open_port, set_process_title)
+from vllm.utils import (decorate_logs, get_distributed_init_method,
+                        get_loopback_ip, get_mp_context, get_open_port,
+                        set_process_title)
 from vllm.v1.executor.abstract import Executor, FailureCallback
 from vllm.v1.outputs import ModelRunnerOutput
 from vllm.worker.worker_base import WorkerWrapperBase
@@ -382,11 +382,11 @@ class WorkerProc:
         pp_str = f"PP{rank // tp_size}" if pp_size > 1 else ""
         tp_str = f"TP{rank % tp_size}" if tp_size > 1 else ""
         suffix = f"{pp_str}{'_' if pp_str and tp_str else ''}{tp_str}"
+        process_name = "VllmWorker"
         if suffix:
             set_process_title(suffix, append=True)
-        pid = os.getpid()
-        _add_prefix(sys.stdout, f"VllmWorker rank={rank}", pid)
-        _add_prefix(sys.stderr, f"VllmWorker rank={rank}", pid)
+            process_name = f"{process_name} {suffix}"
+        decorate_logs(process_name)
 
         # Initialize MessageQueue for receiving SchedulerOutput
         self.rpc_broadcast_mq = MessageQueue.create_from_handle(

From 0f81b310db013ec9fbc1deb9de97bd9b2a9af62f Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Fri, 1 Aug 2025 02:11:40 -0700
Subject: [PATCH 117/224] [Misc] Remove upper bound in openai package version
 (#22060)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 requirements/common.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/common.txt b/requirements/common.txt
index d29b3e59d35b2..6b57a3d2f1d0d 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -13,7 +13,7 @@ tokenizers >= 0.21.1  # Required for fast incremental detokenization.
 protobuf # Required by LlamaTokenizer.
 fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
 aiohttp
-openai >= 1.87.0, <= 1.90.0 # Ensure modern openai package (ensure ResponsePrompt exists in type.responses and max_completion_tokens field support)
+openai >= 1.87.0 # Ensure modern openai package (ensure ResponsePrompt exists in type.responses and max_completion_tokens field support)
 pydantic >= 2.10
 prometheus_client >= 0.18.0
 pillow  # Required for image processing

From 49314869887e169be080201ab8bcda14e745c080 Mon Sep 17 00:00:00 2001
From: WeiQing Chen <40507679+david6666666@users.noreply.github.com>
Date: Fri, 1 Aug 2025 17:11:56 +0800
Subject: [PATCH 118/224] [Doc] Added warning of speculating with draft model
 (#22047)

Signed-off-by: Dilute-l <dilu2333@163.com>
Co-authored-by: Dilute-l <dilu2333@163.com>
---
 docs/features/spec_decode.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/features/spec_decode.md b/docs/features/spec_decode.md
index be4b91feda7aa..89d5b489e1888 100644
--- a/docs/features/spec_decode.md
+++ b/docs/features/spec_decode.md
@@ -15,6 +15,10 @@ Speculative decoding is a technique which improves inter-token latency in memory
 
 The following code configures vLLM in an offline mode to use speculative decoding with a draft model, speculating 5 tokens at a time.
 
+!!! warning
+    In vllm v0.10.0, speculative decoding with a draft model is not supported.
+    If you use the following code, you will get a `NotImplementedError`.
+
 ??? code
 
     ```python

From 28b18cc741e596ea6f9981b8365c4819523fc24b Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Fri, 1 Aug 2025 19:09:54 +0800
Subject: [PATCH 119/224] [Quantization] Enable BNB support for InternS1
 (#21953)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 .../model_loader/bitsandbytes_loader.py       | 39 ++++++++++++-------
 vllm/model_executor/utils.py                  | 20 +++++++++-
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py
index 68fcb785691c8..f54dfab5238e1 100644
--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -34,7 +34,8 @@ from vllm.model_executor.model_loader.weight_utils import (
     filter_duplicate_safetensors_files, filter_files_not_needed_for_inference,
     pt_weights_iterator, safetensors_weights_iterator)
 from vllm.model_executor.models import is_pooling_model
-from vllm.model_executor.utils import (get_packed_modules_mapping,
+from vllm.model_executor.utils import (get_moe_expert_mapping,
+                                       get_packed_modules_mapping,
                                        set_weight_attrs)
 from vllm.platforms import current_platform
 
@@ -43,6 +44,12 @@ from vllm.platforms import current_platform
 logger = init_logger(__name__)
 
 
+def is_moe_model(model: torch.nn.Module) -> bool:
+    """Checks if the model contains FusedMoE layers."""
+    return bool(any(
+        isinstance(module, FusedMoE) for module in model.modules()))
+
+
 class BitsAndBytesModelLoader(BaseModelLoader):
     """Model loader to load model weights with BitAndBytes quantization."""
 
@@ -61,6 +68,8 @@ class BitsAndBytesModelLoader(BaseModelLoader):
         # Store all module names (from transformers) that support
         # BNB quantization.
         self.target_modules: list[str] = []
+        # Store the mapping of expert parameters for MoE models.
+        self.expert_params_mapping: list[tuple[str, str, int, str]] = []
         # mapping weight names from transformers to vllm.
         self.weight_mapper: Callable = lambda name: name
         self.pre_quant: bool = False
@@ -413,13 +422,8 @@ class BitsAndBytesModelLoader(BaseModelLoader):
                 # in case model has a mixture of disk-merged and disk-split
                 # weights with same last name.
                 self.target_modules.append(name)
-            elif (isinstance(module, FusedMoE)
-                  and hasattr(module.quant_method, "quant_config")):
-                if not hasattr(model, "get_expert_mapping"):
-                    raise AttributeError(
-                        f"MoE Model {type(model).__name__} does not support "
-                        "BitsAndBytes quantization yet. Ensure this model has "
-                        "'get_expert_mapping' method.")
+            elif isinstance(module, FusedMoE) and hasattr(
+                    module.quant_method, "quant_config"):
                 # TODO: support FusedMoE with prequant and 8bit.
                 if self.pre_quant:
                     raise ValueError(
@@ -430,9 +434,9 @@ class BitsAndBytesModelLoader(BaseModelLoader):
                         "BitsAndBytes 8bit quantization with FusedMoE is not "
                         "supported yet.")
                 # Get the corresponding weight name using module name and
-                # get_expert_mapping.
-                expert_mapping = model.get_expert_mapping()
-                for exp in expert_mapping:
+                # expert_params_mapping.
+
+                for exp in self.expert_params_mapping:
                     weight_name = exp[1]
                     rep_name = name.replace("experts",
                                             "") + weight_name.removesuffix(".")
@@ -464,7 +468,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
             elif isinstance(module, (RowParallelLinear, )):
                 self.column_sharded_weights_modules.append(name)
             elif isinstance(module, FusedMoE):
-                expert_mapping = model.get_expert_mapping()
+                expert_mapping = self.expert_params_mapping
                 for exp in expert_mapping:
                     if exp[-1] == "w2":
                         weight_name = exp[1]
@@ -516,6 +520,13 @@ class BitsAndBytesModelLoader(BaseModelLoader):
         self.is_pool_model = is_pooling_model(model)
         self.modules_mapping = ParamMapping(get_packed_modules_mapping(model))
 
+        if is_moe_model(model):
+            self.expert_params_mapping = get_moe_expert_mapping(model)
+            if not self.expert_params_mapping:
+                raise AttributeError(
+                    f"MoE Model {type(model).__name__} does not support "
+                    "BitsAndBytes quantization yet. Ensure this model has "
+                    "'get_expert_mapping' method.")
         # For some models like Molmo, we need to use hf_to_vllm_mapper
         # to ensure correct loading of weights.
         if hf_to_vllm_mapper := getattr(model, "hf_to_vllm_mapper", None):
@@ -569,10 +580,10 @@ class BitsAndBytesModelLoader(BaseModelLoader):
         """
         from bitsandbytes.functional import QuantState
 
-        if not hasattr(model, "get_expert_mapping"):
+        if not self.expert_params_mapping:
             return dict()
 
-        expert_mapping = model.get_expert_mapping()
+        expert_mapping = self.expert_params_mapping
         expert_qs_dict = {}
         for name, module in model.named_modules():
             if not isinstance(module, FusedMoE):
diff --git a/vllm/model_executor/utils.py b/vllm/model_executor/utils.py
index 2b20ca2a3ba3f..41ed0b09c5a2a 100644
--- a/vllm/model_executor/utils.py
+++ b/vllm/model_executor/utils.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Utils for model executor."""
+
 import copy
 from typing import Any, Optional
 
@@ -9,6 +10,7 @@ import torch
 
 def set_random_seed(seed: int) -> None:
     from vllm.platforms import current_platform
+
     current_platform.seed_everything(seed)
 
 
@@ -29,7 +31,7 @@ def set_weight_attrs(
         return
     for key, value in weight_attrs.items():
         assert not hasattr(
-            weight, key), (f"Overwriting existing tensor attribute: {key}")
+            weight, key), f"Overwriting existing tensor attribute: {key}"
 
         # NOTE(woosuk): During weight loading, we often do something like:
         # narrowed_tensor = param.data.narrow(0, offset, len)
@@ -41,6 +43,7 @@ def set_weight_attrs(
         # we sync the param tensor after its weight loader is called.
         # TODO(woosuk): Remove this hack once we have a better solution.
         from vllm.platforms import current_platform
+
         if current_platform.is_tpu() and key == "weight_loader":
             value = _make_synced_weight_loader(value)
         setattr(weight, key, value)
@@ -77,4 +80,17 @@ def get_packed_modules_mapping(model: torch.nn.Module) -> dict[str, list[str]]:
                 f"safely because of conflicts from {type(child).__name__}.")
         else:
             parent_map.update(child_map)
-    return parent_map
\ No newline at end of file
+    return parent_map
+
+
+def get_moe_expert_mapping(
+    model: torch.nn.Module, ) -> list[tuple[str, str, int, str]]:
+    if parent_map := getattr(model, "get_expert_mapping", None):
+        return parent_map()
+    else:
+        # We only check main components instead of whole model submodules
+        for child in model.children():
+            child_map = getattr(child, "get_expert_mapping", None)
+            if child_map is not None:
+                return child_map()
+        return []

From 87c94bc87943818ad039d5c916df793fbd081e6a Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 1 Aug 2025 13:24:46 +0100
Subject: [PATCH 120/224] Revert "Update sampling_metadata.py (#21937)"
 (#22088)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 vllm/model_executor/sampling_metadata.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
index 66bcf1c4bfe50..56f0f0984bfa0 100644
--- a/vllm/model_executor/sampling_metadata.py
+++ b/vllm/model_executor/sampling_metadata.py
@@ -539,37 +539,37 @@ class SamplingTensors:
         temperatures_t = torch.tensor(
             temperatures,
             device="cpu",
-            dtype=torch.float32,
+            dtype=dtype,
             pin_memory=pin_memory,
         )
         top_ps_t = torch.tensor(
             top_ps,
             device="cpu",
-            dtype=torch.float32,
+            dtype=dtype,
             pin_memory=pin_memory,
         )
         min_ps_t = torch.tensor(
             min_ps,
             device="cpu",
-            dtype=torch.float32,
+            dtype=dtype,
             pin_memory=pin_memory,
         )
         presence_penalties_t = torch.tensor(
             presence_penalties,
             device="cpu",
-            dtype=torch.float32,
+            dtype=dtype,
             pin_memory=pin_memory,
         )
         frequency_penalties_t = torch.tensor(
             frequency_penalties,
             device="cpu",
-            dtype=torch.float32,
+            dtype=dtype,
             pin_memory=pin_memory,
         )
         repetition_penalties_t = torch.tensor(
             repetition_penalties,
             device="cpu",
-            dtype=torch.float32,
+            dtype=dtype,
             pin_memory=pin_memory,
         )
         top_ks_t = torch.tensor(

From dfbc1f88807a1bddb75fc1dd587922567d7c133f Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Fri, 1 Aug 2025 08:25:18 -0400
Subject: [PATCH 121/224] [Speculative Decoding] Add `speculators` config
 support (#21345)

---
 .../speculators/test_eagle3.py                | 16 ++++
 vllm/config.py                                | 20 +++-
 vllm/engine/arg_utils.py                      | 22 ++++-
 vllm/model_executor/models/llama_eagle3.py    | 26 +++++-
 vllm/transformers_utils/config.py             | 32 ++++++-
 vllm/transformers_utils/configs/__init__.py   |  2 +
 .../configs/speculators/__init__.py           |  2 +
 .../configs/speculators/algos.py              | 32 +++++++
 .../configs/speculators/base.py               | 91 +++++++++++++++++++
 9 files changed, 232 insertions(+), 11 deletions(-)
 create mode 100644 tests/speculative_decoding/speculators/test_eagle3.py
 create mode 100644 vllm/transformers_utils/configs/speculators/__init__.py
 create mode 100644 vllm/transformers_utils/configs/speculators/algos.py
 create mode 100644 vllm/transformers_utils/configs/speculators/base.py

diff --git a/tests/speculative_decoding/speculators/test_eagle3.py b/tests/speculative_decoding/speculators/test_eagle3.py
new file mode 100644
index 0000000000000..c58fc8c0dc5f4
--- /dev/null
+++ b/tests/speculative_decoding/speculators/test_eagle3.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import torch
+
+
+@pytest.mark.parametrize(
+    "model_path",
+    [("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717"),
+     ("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")])
+def test_llama(vllm_runner, example_prompts, model_path):
+    with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
+        vllm_outputs = vllm_model.generate_greedy(example_prompts,
+                                                  max_tokens=20)
+        print(vllm_outputs)
+        assert vllm_outputs
diff --git a/vllm/config.py b/vllm/config.py
index 93daab7d6ae97..2d61552c5dadc 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -39,8 +39,8 @@ from vllm.transformers_utils.config import (
     ConfigFormat, get_config, get_hf_image_processor_config,
     get_hf_text_config, get_pooling_config,
     get_sentence_transformer_tokenizer_config, is_encoder_decoder,
-    try_get_generation_config, try_get_safetensors_metadata,
-    try_get_tokenizer_config, uses_mrope)
+    maybe_override_with_speculators_target_model, try_get_generation_config,
+    try_get_safetensors_metadata, try_get_tokenizer_config, uses_mrope)
 from vllm.transformers_utils.s3_utils import S3Model
 from vllm.transformers_utils.utils import is_s3, maybe_model_redirect
 # yapf conflicts with isort for this block
@@ -535,6 +535,15 @@ class ModelConfig:
                     "affect the random state of the Python process that "
                     "launched vLLM.", self.seed)
 
+        if self.runner != "draft":
+            # If we're not running the draft model, check for speculators config
+            # If speculators config, set model / tokenizer to be target model
+            self.model, self.tokenizer = maybe_override_with_speculators_target_model(  # noqa: E501
+                model=self.model,
+                tokenizer=self.tokenizer,
+                revision=self.revision,
+                trust_remote_code=self.trust_remote_code)
+
         # Keep set served_model_name before maybe_model_redirect(self.model)
         self.served_model_name = get_served_model_name(self.model,
                                                        self.served_model_name)
@@ -606,8 +615,8 @@ class ModelConfig:
                                self.config_format,
                                hf_overrides_kw=hf_overrides_kw,
                                hf_overrides_fn=hf_overrides_fn)
-        self.hf_config = hf_config
 
+        self.hf_config = hf_config
         self.hf_text_config = get_hf_text_config(self.hf_config)
         self.attention_chunk_size = getattr(self.hf_text_config,
                                             "attention_chunk_size", None)
@@ -2980,10 +2989,13 @@ class SpeculativeConfig:
                             "Chunked prefill and EAGLE are not compatible "
                             "when using V0.")
 
+                    from vllm.transformers_utils.configs import (
+                        SpeculatorsConfig)
                     from vllm.transformers_utils.configs.eagle import (
                         EAGLEConfig)
+
                     if isinstance(self.draft_model_config.hf_config,
-                                  EAGLEConfig):
+                                  (EAGLEConfig, SpeculatorsConfig)):
                         pass
                     else:
                         eagle_config = EAGLEConfig(
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 78272d983eaf5..efa077a88270a 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -978,8 +978,28 @@ class EngineArgs:
         provided as a JSON string input via CLI arguments or directly as a
         dictionary from the engine.
         """
+
+        from vllm.transformers_utils.config import get_config
+        from vllm.transformers_utils.configs.speculators.base import (
+            SpeculatorsConfig)
+
         if self.speculative_config is None:
-            return None
+            hf_config = get_config(self.hf_config_path or self.model,
+                                   self.trust_remote_code, self.revision,
+                                   self.code_revision, self.config_format)
+
+            # if loading a SpeculatorsConfig, load the specualtive_config
+            # details from the config directly
+            # no user input required / expected
+            if isinstance(hf_config, SpeculatorsConfig):
+                # We create one since we dont create one
+                self.speculative_config = {}
+                self.speculative_config[
+                    "num_speculative_tokens"] = hf_config.num_lookahead_tokens
+                self.speculative_config["model"] = self.model
+                self.speculative_config["method"] = hf_config.method
+            else:
+                return None
 
         # Note(Shangming): These parameters are not obtained from the cli arg
         # '--speculative-config' and must be passed in when creating the engine
diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py
index 71275f0d58579..572930c39a846 100644
--- a/vllm/model_executor/models/llama_eagle3.py
+++ b/vllm/model_executor/models/llama_eagle3.py
@@ -51,6 +51,25 @@ class LlamaDecoderLayer(LlamaDecoderLayer):
 
         self.hidden_norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
 
+        if getattr(config, "norm_before_residual", False):
+            self._residual_norm = self._norm_before_residual
+        else:
+            self._residual_norm = self._norm_after_residual
+
+    def _norm_before_residual(
+            self,
+            hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        hidden_states = self.hidden_norm(hidden_states)
+        residual = hidden_states
+        return hidden_states, residual
+
+    def _norm_after_residual(
+            self,
+            hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        residual = hidden_states
+        hidden_states = self.hidden_norm(hidden_states)
+        return hidden_states, residual
+
     def forward(
         self,
         positions: torch.Tensor,
@@ -59,9 +78,10 @@ class LlamaDecoderLayer(LlamaDecoderLayer):
         residual: Optional[torch.Tensor],
     ) -> tuple[torch.Tensor, torch.Tensor]:
 
-        residual = hidden_states
         embeds = self.input_layernorm(embeds)
-        hidden_states = self.hidden_norm(hidden_states)
+
+        hidden_states, residual = self._residual_norm(
+            hidden_states=hidden_states)
 
         hidden_states = torch.cat([embeds, hidden_states], dim=-1)
         # Self Attention
@@ -102,7 +122,7 @@ class LlamaModel(nn.Module):
 
         self.layers = nn.ModuleList([
             LlamaDecoderLayer(
-                self.config,
+                config=self.config,
                 prefix=maybe_prefix(prefix, f"layers.{start_layer_id}"),
             )
         ])
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index fcaa48c1392a3..0e633c2c0b6ae 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -35,8 +35,9 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DeepseekVLV2Config,
                                              MllamaConfig, MLPSpeculatorConfig,
                                              Nemotron_Nano_VL_Config,
                                              NemotronConfig, NVLM_D_Config,
-                                             RWConfig, Step3TextConfig,
-                                             Step3VLConfig, UltravoxConfig)
+                                             RWConfig, SpeculatorsConfig,
+                                             Step3TextConfig, Step3VLConfig,
+                                             UltravoxConfig)
 # yapf: enable
 from vllm.transformers_utils.configs.mistral import adapt_config_dict
 from vllm.transformers_utils.utils import check_gguf_file
@@ -81,6 +82,7 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
     "mlp_speculator": MLPSpeculatorConfig,
     "medusa": MedusaConfig,
     "eagle": EAGLEConfig,
+    "speculators": SpeculatorsConfig,
     "nemotron": NemotronConfig,
     "NVLM_D": NVLM_D_Config,
     "ultravox": UltravoxConfig,
@@ -287,6 +289,27 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig:
     return config
 
 
+def maybe_override_with_speculators_target_model(
+        model: str,
+        tokenizer: str,
+        trust_remote_code: bool,
+        revision: Optional[str] = None) -> tuple[str, str]:
+    """
+    If running a speculators config, override running model with target model
+    """
+    config_dict, _ = PretrainedConfig.get_config_dict(
+        model,
+        revision=revision,
+        trust_remote_code=trust_remote_code,
+        token=_get_hf_token(),
+    )
+    spec_config = config_dict.get("speculators_config")
+    # Return the target model
+    if spec_config is not None:
+        model = tokenizer = spec_config["verifier"]["name_or_path"]
+    return model, tokenizer
+
+
 def get_config(
     model: Union[str, Path],
     trust_remote_code: bool,
@@ -345,9 +368,12 @@ def get_config(
             token=_get_hf_token(),
             **kwargs,
         )
-
         # Use custom model class if it's in our registry
         model_type = config_dict.get("model_type")
+        if model_type is None:
+            model_type = "speculators" if config_dict.get(
+                "speculators_config") is not None else model_type
+
         if model_type in _CONFIG_REGISTRY:
             config_class = _CONFIG_REGISTRY[model_type]
             config = config_class.from_pretrained(
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index 96733da726181..64ace167a5a00 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -24,6 +24,7 @@ from vllm.transformers_utils.configs.nemotron import NemotronConfig
 from vllm.transformers_utils.configs.nemotron_h import NemotronHConfig
 from vllm.transformers_utils.configs.nemotron_vl import Nemotron_Nano_VL_Config
 from vllm.transformers_utils.configs.nvlm_d import NVLM_D_Config
+from vllm.transformers_utils.configs.speculators.base import SpeculatorsConfig
 from vllm.transformers_utils.configs.step3_vl import (Step3TextConfig,
                                                       Step3VisionEncoderConfig,
                                                       Step3VLConfig)
@@ -44,6 +45,7 @@ __all__ = [
     "NemotronHConfig",
     "Nemotron_Nano_VL_Config",
     "NVLM_D_Config",
+    "SpeculatorsConfig",
     "UltravoxConfig",
     "Step3VLConfig",
     "Step3VisionEncoderConfig",
diff --git a/vllm/transformers_utils/configs/speculators/__init__.py b/vllm/transformers_utils/configs/speculators/__init__.py
new file mode 100644
index 0000000000000..208f01a7cb5ee
--- /dev/null
+++ b/vllm/transformers_utils/configs/speculators/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/transformers_utils/configs/speculators/algos.py b/vllm/transformers_utils/configs/speculators/algos.py
new file mode 100644
index 0000000000000..efc87b6bcf26f
--- /dev/null
+++ b/vllm/transformers_utils/configs/speculators/algos.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+SUPPORTED_SPECULATORS_TYPES = {}
+
+
+def register_speculator(name):
+
+    def decorator(fn):
+        SUPPORTED_SPECULATORS_TYPES[name] = fn
+        return fn
+
+    return decorator
+
+
+@register_speculator("eagle3")
+def update_eagle3(config_dict: dict, vllm_config: dict) -> None:
+    """
+    Apply Eagle-3 specific configuration transformations.
+    
+    Eagle-3 specific fields:
+    - draft_vocab_size: Size of the draft model's vocabulary
+    - target_hidden_size: Hidden size of the target model
+    - norm_before_residual: Whether to apply norm before residual connection
+    """
+
+    vllm_config["draft_vocab_size"] = config_dict.get("draft_vocab_size")
+    if config_dict.get("target_hidden_size") is not None:
+        vllm_config["target_hidden_size"] = config_dict["target_hidden_size"]
+    vllm_config["norm_before_residual"] = config_dict.get(
+        "norm_before_residual", True)
+    vllm_config["architectures"] = ["Eagle3LlamaForCausalLM"]
diff --git a/vllm/transformers_utils/configs/speculators/base.py b/vllm/transformers_utils/configs/speculators/base.py
new file mode 100644
index 0000000000000..d7c16e180c709
--- /dev/null
+++ b/vllm/transformers_utils/configs/speculators/base.py
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
+from typing import Any, Union
+
+from transformers import PretrainedConfig
+
+from vllm.transformers_utils.configs.speculators.algos import (
+    SUPPORTED_SPECULATORS_TYPES)
+
+__all__ = ["SpeculatorsConfig"]
+
+
+class SpeculatorsConfig(PretrainedConfig):
+    model_type = "speculators"
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        pretrained_model_name_or_path: Union[str, os.PathLike],
+        **kwargs,
+    ) -> "SpeculatorsConfig":
+        """Load speculators Eagle config and convert to vLLM format."""
+        config_dict, _ = cls.get_config_dict(pretrained_model_name_or_path,
+                                             **kwargs)
+
+        speculators_model_type = config_dict.get("speculators_model_type")
+        if speculators_model_type not in SUPPORTED_SPECULATORS_TYPES:
+            raise ValueError(
+                f"Expected one of: {SUPPORTED_SPECULATORS_TYPES}. "
+                "Please ensure you're loading a speculators-format model.")
+
+        # validate fields
+        # TODO: @dsikka - use speculators pydantic model to validate
+        cls.validate_speculators_config(config_dict=config_dict)
+        # Convert from speculators config -> format that can be ingested by vLLM
+        vllm_config = cls.convert_speculators_to_vllm(config_dict=config_dict)
+        # Apply anything specific to the supported algorithm
+        algo_updater = SUPPORTED_SPECULATORS_TYPES[speculators_model_type]
+        algo_updater(config_dict=config_dict, vllm_config=vllm_config)
+        return cls(**vllm_config)
+
+    @classmethod
+    def validate_speculators_config(cls, config_dict: dict[str, Any]) -> None:
+        try:
+            spec_config = config_dict["speculators_config"]
+            methods = spec_config["proposal_methods"]
+            first_method = methods[0]
+            _ = first_method["speculative_tokens"]
+            _ = spec_config["verifier"]["name_or_path"]
+            _ = config_dict["speculators_model_type"]
+        except (KeyError, IndexError, TypeError) as e:
+            raise ValueError("Invalid speculators config structure") from e
+
+        if "transformer_layer_config" not in config_dict:
+            raise ValueError("Must provide transformer_layer_config")
+
+        if not isinstance(config_dict["transformer_layer_config"], dict):
+            raise TypeError(
+                "'transformer_layer_config' must be a dictionary if provided")
+
+    @classmethod
+    def convert_speculators_to_vllm(
+            cls, config_dict: dict[str, Any]) -> dict[str, Any]:
+        """
+        Convert speculators config format to vLLM format.
+        
+        This method handles the translation of field names and structure
+        between speculators and vLLM formats.
+        
+        Returns:
+            Dictionary with vLLM-compatible configuration
+        """
+        # Currently we only support one proposal method
+        spec_config = config_dict["speculators_config"]
+        first_method = spec_config.get("proposal_methods")[0]
+        num_lookahead_tokens = first_method.get("speculative_tokens")
+
+        if num_lookahead_tokens is None:
+            raise ValueError(
+                "Missing 'speculative_tokens' in proposal method. "
+                f"Got: {first_method}")
+
+        # Build base vLLM config
+        vllm_config = {
+            "method": config_dict.get("speculators_model_type"),
+            "num_lookahead_tokens": num_lookahead_tokens,
+            "target_model": spec_config.get("verifier")["name_or_path"]
+        }
+        vllm_config.update(config_dict["transformer_layer_config"])
+        return vllm_config

From 26b5f7bd2a4005dccb797804c93cbce329253003 Mon Sep 17 00:00:00 2001
From: TJian <tunjian.tan@embeddedllm.com>
Date: Fri, 1 Aug 2025 05:25:20 -0700
Subject: [PATCH 122/224] [BUG] [ROCm] Fix import bug on ROCm (#22083)

Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
---
 vllm/compilation/pass_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py
index 11e03daced160..54f00d5415216 100644
--- a/vllm/compilation/pass_manager.py
+++ b/vllm/compilation/pass_manager.py
@@ -7,7 +7,7 @@ from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 
-if current_platform.is_cuda_alike():
+if current_platform.is_cuda():
     from .fusion import FusionPass
     from .collective_fusion import AllReduceFusionPass, AsyncTPPass
     from .fusion_attn import AttnFusionPass

From fb0e0d46fc443f08bc2a859b839f0f66c6a7f670 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 1 Aug 2025 13:26:42 +0100
Subject: [PATCH 123/224] Fix `get_kwargs` for case where type hint is
 `list[Union[str, type]]` (#22016)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 tests/engine/test_arg_utils.py |  7 ++++++-
 vllm/engine/arg_utils.py       | 10 ++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py
index 1d1926068d28c..c282bf002304a 100644
--- a/tests/engine/test_arg_utils.py
+++ b/tests/engine/test_arg_utils.py
@@ -5,7 +5,7 @@ import json
 from argparse import ArgumentError
 from contextlib import nullcontext
 from dataclasses import dataclass, field
-from typing import Annotated, Literal, Optional
+from typing import Annotated, Literal, Optional, Union
 
 import pytest
 
@@ -136,6 +136,8 @@ class DummyConfig:
     """List with variable length"""
     list_literal: list[Literal[1, 2]] = field(default_factory=list)
     """List with literal choices"""
+    list_union: list[Union[str, type[object]]] = field(default_factory=list)
+    """List with union type"""
     literal_literal: Literal[Literal[1], Literal[2]] = 1
     """Literal of literals with default 1"""
     json_tip: dict = field(default_factory=dict)
@@ -187,6 +189,9 @@ def test_get_kwargs():
     assert kwargs["list_literal"]["type"] is int
     assert kwargs["list_literal"]["nargs"] == "+"
     assert kwargs["list_literal"]["choices"] == [1, 2]
+    # lists with unions should become str type.
+    # If not, we cannot know which type to use for parsing
+    assert kwargs["list_union"]["type"] is str
     # literals of literals should have merged choices
     assert kwargs["literal_literal"]["choices"] == [1, 2]
     # dict should have json tip in help
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index efa077a88270a..f938f19b90469 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -217,10 +217,12 @@ Additionally, list elements can be passed individually using `+`:
         elif contains_type(type_hints, list):
             type_hint = get_type(type_hints, list)
             types = get_args(type_hint)
-            assert len(types) == 1, (
-                "List type must have exactly one type. Got "
-                f"{type_hint} with types {types}")
-            kwargs[name]["type"] = types[0]
+            list_type = types[0]
+            if get_origin(list_type) is Union:
+                msg = "List type must contain str if it is a Union."
+                assert str in get_args(list_type), msg
+                list_type = str
+            kwargs[name]["type"] = list_type
             kwargs[name]["nargs"] = "+"
         elif contains_type(type_hints, int):
             kwargs[name]["type"] = int

From f81c1bb05504672ddd66905161c6ada549fd4b85 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Fri, 1 Aug 2025 08:28:45 -0400
Subject: [PATCH 124/224] [Bugfix] Check NVIDIA artifactory is accessible
 before using flashinfer cubin kernels (#21893)

---
 vllm/attention/backends/flashinfer.py    | 46 +-------------
 vllm/utils/flashinfer.py                 | 81 +++++++++++++++++++++++-
 vllm/v1/attention/backends/flashinfer.py | 49 +-------------
 vllm/v1/attention/backends/mla/common.py | 16 ++---
 4 files changed, 93 insertions(+), 99 deletions(-)

diff --git a/vllm/attention/backends/flashinfer.py b/vllm/attention/backends/flashinfer.py
index 824ff8cca201a..b3372ce2eca8c 100644
--- a/vllm/attention/backends/flashinfer.py
+++ b/vllm/attention/backends/flashinfer.py
@@ -44,9 +44,9 @@ from vllm.attention.layer import Attention
 from vllm.attention.ops.paged_attn import PagedAttention
 from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.logger import init_logger
-from vllm.platforms import current_platform
 from vllm.utils import (async_tensor_h2d, get_kv_cache_torch_dtype,
                         make_tensor_with_pad)
+from vllm.utils.flashinfer import use_trtllm_decode_attention
 
 logger = init_logger(__name__)
 
@@ -56,7 +56,6 @@ if TYPE_CHECKING:
 
 
 class FlashInferBackend(AttentionBackend):
-    cached_sm100a_supported: Optional[bool] = None
 
     @staticmethod
     def get_name() -> str:
@@ -123,47 +122,6 @@ class FlashInferBackend(AttentionBackend):
         else:
             raise ValueError(f"Unrecognized FP8 dtype: {kv_cache_dtype}")
 
-    @staticmethod
-    def use_trtllm_decode_attention(
-        batch_size: int,
-        max_seq_len: int,
-        kv_cache_dtype: str,
-        num_qo_heads: Optional[int],
-        num_kv_heads: Optional[int],
-        attn_head_size: Optional[int],
-    ) -> bool:
-        if FlashInferBackend.cached_sm100a_supported is None:
-            FlashInferBackend.cached_sm100a_supported = (
-                current_platform.has_device_capability(100))
-        if not FlashInferBackend.cached_sm100a_supported:
-            return False
-        # Check if the dimensions are supported by TRTLLM decode attention
-        if (attn_head_size is None or num_qo_heads is None
-                or num_kv_heads is None or num_qo_heads // num_kv_heads > 8
-                or num_qo_heads % num_kv_heads != 0 or attn_head_size != 128):
-            return False
-        env_value = envs.VLLM_USE_TRTLLM_DECODE_ATTENTION
-        if env_value is not None:
-            logger.info_once("VLLM_USE_TRTLLM_DECODE_ATTENTION is set to %s",
-                             env_value)
-            # Environment variable is set - respect it
-            # Making the conditional check for zero because
-            # the path is automatically enabled if the batch size condition
-            # is satisfied.
-            no_use_trtllm = (env_value == "0")
-            if not no_use_trtllm:
-                logger.info_once("Using TRTLLM decode attention.")
-            return not no_use_trtllm
-        else:
-            # Environment variable not set - use auto-detection
-            use_trtllm = (FlashInferBackend.cached_sm100a_supported
-                          and batch_size <= 256 and max_seq_len < 131072
-                          and kv_cache_dtype == "auto")
-            if use_trtllm:
-                logger.warning_once(
-                    "Using TRTLLM decode attention (auto-detected).")
-        return use_trtllm
-
 
 @dataclass
 class PerLayerParameters:
@@ -1156,7 +1114,7 @@ class FlashInferImpl(AttentionImpl):
             assert decode_meta.decode_wrapper._sm_scale == softmax_scale
             # TODO: @pavanimajety Remove this once the switch happens
             # inside flashinfer.
-            if not FlashInferBackend.use_trtllm_decode_attention(
+            if not use_trtllm_decode_attention(
                     num_decode_tokens, attn_metadata.max_decode_seq_len,
                     kv_cache_dtype, attn_metadata.num_qo_heads,
                     attn_metadata.num_kv_heads, attn_metadata.head_dim):
diff --git a/vllm/utils/flashinfer.py b/vllm/utils/flashinfer.py
index 3bfb9808c0a00..29967bc516715 100644
--- a/vllm/utils/flashinfer.py
+++ b/vllm/utils/flashinfer.py
@@ -10,12 +10,25 @@ import contextlib
 import functools
 import importlib
 import importlib.util
-from typing import Any, Callable, NoReturn
+import os
+from typing import Any, Callable, NoReturn, Optional
 
+import requests
+
+import vllm.envs as envs
 from vllm.logger import init_logger
+from vllm.platforms import current_platform
 
 logger = init_logger(__name__)
 
+# This is the storage path for the cubins, it can be replaced
+# with a local path for testing.
+# Referenced from https://github.com/flashinfer-ai/flashinfer/blob/0c9a92c3d9a7e043ab6f3f7b2273269caf6ab044/flashinfer/jit/cubin_loader.py#L35  # noqa: E501
+FLASHINFER_CUBINS_REPOSITORY = os.environ.get(
+    "FLASHINFER_CUBINS_REPOSITORY",
+    "https://edge.urm.nvidia.com/artifactory/sw-kernelinferencelibrary-public-generic-local/",  # noqa: E501
+)
+
 
 @functools.cache
 def has_flashinfer() -> bool:
@@ -108,6 +121,70 @@ def has_flashinfer_cutlass_fused_moe() -> bool:
     return True
 
 
+@functools.cache
+def has_nvidia_artifactory() -> bool:
+    """Return ``True`` if NVIDIA's artifactory is accessible.
+    
+    This checks connectivity to the kernel inference library artifactory
+    which is required for downloading certain cubin kernels like TRTLLM FHMA.
+    """
+    try:
+        # Use a short timeout to avoid blocking for too long
+        response = requests.get(FLASHINFER_CUBINS_REPOSITORY, timeout=5)
+        accessible = response.status_code == 200
+        if accessible:
+            logger.debug_once("NVIDIA artifactory is accessible")
+        else:
+            logger.warning_once(
+                "NVIDIA artifactory returned failed status code: %d",
+                response.status_code)
+        return accessible
+    except Exception as e:
+        logger.warning_once("Failed to connect to NVIDIA artifactory: %s", e)
+        return False
+
+
+def use_trtllm_decode_attention(
+    num_tokens: int,
+    max_seq_len: int,
+    kv_cache_dtype: str,
+    num_qo_heads: Optional[int],
+    num_kv_heads: Optional[int],
+    attn_head_size: Optional[int],
+) -> bool:
+    # Requires SM100 and NVIDIA artifactory to be accessible to download cubins
+    if not (current_platform.is_device_capability(100)
+            and has_nvidia_artifactory()):
+        return False
+
+    # Check if the dimensions are supported by TRTLLM decode attention
+    if (attn_head_size is None or num_qo_heads is None or num_kv_heads is None
+            or num_qo_heads // num_kv_heads > 8
+            or num_qo_heads % num_kv_heads != 0 or attn_head_size != 128):
+        return False
+
+    env_value = envs.VLLM_USE_TRTLLM_DECODE_ATTENTION
+    if env_value is not None:
+        logger.info_once("VLLM_USE_TRTLLM_DECODE_ATTENTION is set to %s",
+                         env_value)
+        # Environment variable is set - respect it
+        # Making the conditional check for zero because
+        # the path is automatically enabled if the batch size condition
+        # is satisfied.
+        no_use_trtllm = (env_value == "0")
+        if not no_use_trtllm:
+            logger.info_once("Using TRTLLM decode attention.")
+        return not no_use_trtllm
+    else:
+        # Environment variable not set - use auto-detection
+        use_trtllm = (num_tokens <= 256 and max_seq_len < 131072
+                      and kv_cache_dtype == "auto")
+        if use_trtllm:
+            logger.warning_once(
+                "Using TRTLLM decode attention (auto-detected).")
+        return use_trtllm
+
+
 __all__ = [
     "has_flashinfer",
     "flashinfer_trtllm_fp8_block_scale_moe",
@@ -117,4 +194,6 @@ __all__ = [
     "autotune",
     "has_flashinfer_moe",
     "has_flashinfer_cutlass_fused_moe",
+    "has_nvidia_artifactory",
+    "use_trtllm_decode_attention",
 ]
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 27552f0e7c1ef..f8af1d7e41831 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -17,8 +17,8 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionType)
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
-from vllm.platforms import current_platform
 from vllm.utils import cdiv
+from vllm.utils.flashinfer import use_trtllm_decode_attention
 from vllm.v1.attention.backends.flash_attn import use_cascade_attention
 from vllm.v1.attention.backends.utils import (
     AttentionMetadataBuilder, CommonAttentionMetadata, get_kv_cache_layout,
@@ -38,7 +38,6 @@ logger = init_logger(__name__)
 class FlashInferBackend(AttentionBackend):
 
     accept_output_buffer: bool = True
-    cached_sm100a_supported: Optional[bool] = None
 
     @classmethod
     def get_supported_dtypes(cls) -> list[torch.dtype]:
@@ -98,48 +97,6 @@ class FlashInferBackend(AttentionBackend):
             raise ValueError(f"Unknown cache layout format {cache_layout}.")
         return stride_order
 
-    @staticmethod
-    def use_trtllm_decode_attention(
-        batch_size: int,
-        max_seq_len: int,
-        kv_cache_dtype: str,
-        num_qo_heads: int,
-        num_kv_heads: int,
-        attn_head_size: int,
-    ) -> bool:
-        if FlashInferBackend.cached_sm100a_supported is None:
-            FlashInferBackend.cached_sm100a_supported = (
-                current_platform.has_device_capability(100))
-        if not FlashInferBackend.cached_sm100a_supported:
-            return False
-        if (num_qo_heads // num_kv_heads > 8
-                or num_qo_heads % num_kv_heads != 0 or attn_head_size != 128):
-            return False
-        env_value = envs.VLLM_USE_TRTLLM_DECODE_ATTENTION
-        if env_value is not None:
-            logger.info_once("VLLM_USE_TRTLLM_DECODE_ATTENTION is set to %s",
-                             env_value)
-            # Environment variable is set - respect it
-            # Making the conditional check for zero because
-            # the path is automatically enabled if the batch size condition
-            # is satisfied.
-            no_use_trtllm = env_value == "0"
-            if not no_use_trtllm:
-                logger.info_once(
-                    "VLLM_USE_TRTLLM_DECODE_ATTENTION is set to 1, "
-                    "using TRTLLM decode attention.")
-            return not no_use_trtllm
-        else:
-            # Environment variable not set - use auto-detection
-            # Only supports attention head size of 128
-            use_trtllm = (FlashInferBackend.cached_sm100a_supported
-                          and batch_size <= 256 and max_seq_len < 131072
-                          and kv_cache_dtype == "auto")
-            if use_trtllm:
-                logger.warning_once(
-                    "Using TRTLLM decode attention (auto-detected).")
-        return use_trtllm
-
     @staticmethod
     def get_fp8_dtype_for_flashinfer(kv_cache_dtype: str) -> torch.dtype:
         if kv_cache_dtype in ("fp8", "fp8_e4m3"):
@@ -352,7 +309,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
 
             if num_decodes > 0:
                 attn_metadata.decode_wrapper = self._get_decode_wrapper()
-                if not FlashInferBackend.use_trtllm_decode_attention(
+                if not use_trtllm_decode_attention(
                         num_decodes, attn_metadata.max_seq_len,
                         self.cache_config.cache_dtype,
                         attn_metadata.num_qo_heads, attn_metadata.num_kv_heads,
@@ -636,7 +593,7 @@ class FlashInferImpl(AttentionImpl):
             decode_query = query[:num_decode_tokens]
             assert decode_query.shape[0] == num_decode_tokens
             assert decode_wrapper is not None
-            if not FlashInferBackend.use_trtllm_decode_attention(
+            if not use_trtllm_decode_attention(
                     attn_metadata.num_decodes, attn_metadata.max_seq_len,
                     self.kv_cache_dtype, attn_metadata.num_qo_heads,
                     attn_metadata.num_kv_heads, attn_metadata.head_dim):
diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py
index 0095d75217856..d112468f1c91d 100755
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@@ -209,6 +209,7 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                UnquantizedLinearMethod)
 from vllm.platforms import current_platform
 from vllm.utils import cdiv, round_down
+from vllm.utils.flashinfer import has_nvidia_artifactory
 from vllm.v1.attention.backends.utils import (
     AttentionMetadataBuilder, CommonAttentionMetadata,
     get_per_layer_parameters, infer_global_hyperparameters,
@@ -379,17 +380,16 @@ M = TypeVar("M", bound=MLACommonMetadata)
 
 
 def use_flashinfer_prefill() -> bool:
-    if flashinfer_available and not envs.VLLM_USE_CUDNN_PREFILL:
-        # For blackwell default to flashinfer prefill if its available since
-        #  its faster than FA2.
-        return current_platform.has_device_capability(100)
-    return False
+    # For blackwell default to flashinfer prefill if its available since
+    # it is faster than FA2.
+    return (flashinfer_available and not envs.VLLM_USE_CUDNN_PREFILL
+            and current_platform.is_device_capability(100))
 
 
 def use_cudnn_prefill() -> bool:
-    if flashinfer_available and envs.VLLM_USE_CUDNN_PREFILL:
-        return current_platform.has_device_capability(100)
-    return False
+    return (flashinfer_available and envs.VLLM_USE_CUDNN_PREFILL
+            and current_platform.is_device_capability(100)
+            and has_nvidia_artifactory())
 
 
 # Currently 394MB, this can be tuned based on GEMM sizes used.

From 0a6d305e0f7b63b06c87bb1f7564ae8d148a3311 Mon Sep 17 00:00:00 2001
From: Gamhang <leeebucks@gmail.com>
Date: Fri, 1 Aug 2025 21:07:33 +0800
Subject: [PATCH 125/224] feat(multimodal): Add customizable background color
 for RGBA to RGB conversion (#22052)

Signed-off-by: Jinheng Li <ahengljh@gmail.com>
Co-authored-by: Jinheng Li <ahengljh@gmail.com>
---
 docs/features/multimodal_inputs.md |  44 +++++++++++
 tests/multimodal/test_image.py     | 115 ++++++++++++++++++++++++++++-
 vllm/multimodal/image.py           |  37 ++++++++--
 3 files changed, 190 insertions(+), 6 deletions(-)

diff --git a/docs/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md
index b8677f11a1d3c..cdd32924b5668 100644
--- a/docs/features/multimodal_inputs.md
+++ b/docs/features/multimodal_inputs.md
@@ -172,6 +172,36 @@ Multi-image input can be extended to perform video captioning. We show this with
         print(generated_text)
     ```
 
+#### Custom RGBA Background Color
+
+When loading RGBA images (images with transparency), vLLM converts them to RGB format. By default, transparent pixels are replaced with white background. You can customize this background color using the `rgba_background_color` parameter in `media_io_kwargs`.
+
+??? code
+
+    ```python
+    from vllm import LLM
+    
+    # Default white background (no configuration needed)
+    llm = LLM(model="llava-hf/llava-1.5-7b-hf")
+    
+    # Custom black background for dark theme
+    llm = LLM(
+        model="llava-hf/llava-1.5-7b-hf",
+        media_io_kwargs={"image": {"rgba_background_color": [0, 0, 0]}}
+    )
+    
+    # Custom brand color background (e.g., blue)
+    llm = LLM(
+        model="llava-hf/llava-1.5-7b-hf", 
+        media_io_kwargs={"image": {"rgba_background_color": [0, 0, 255]}}
+    )
+    ```
+
+!!! note
+    - The `rgba_background_color` accepts RGB values as a list `[R, G, B]` or tuple `(R, G, B)` where each value is 0-255
+    - This setting only affects RGBA images with transparency; RGB images are unchanged
+    - If not specified, the default white background `(255, 255, 255)` is used for backward compatibility
+
 ### Video Inputs
 
 You can pass a list of NumPy arrays directly to the `'video'` field of the multi-modal dictionary
@@ -478,6 +508,20 @@ Full example: <gh-file:examples/online_serving/openai_chat_completion_client_for
     export VLLM_VIDEO_FETCH_TIMEOUT=<timeout>
     ```
 
+#### Custom RGBA Background Color
+
+To use a custom background color for RGBA images, pass the `rgba_background_color` parameter via `--media-io-kwargs`:
+
+```bash
+# Example: Black background for dark theme
+vllm serve llava-hf/llava-1.5-7b-hf \
+  --media-io-kwargs '{"image": {"rgba_background_color": [0, 0, 0]}}'
+
+# Example: Custom gray background
+vllm serve llava-hf/llava-1.5-7b-hf \
+  --media-io-kwargs '{"image": {"rgba_background_color": [128, 128, 128]}}'
+```
+
 ### Audio Inputs
 
 Audio input is supported according to [OpenAI Audio API](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in).
diff --git a/tests/multimodal/test_image.py b/tests/multimodal/test_image.py
index cfd44351a6d1f..271a85f1195ec 100644
--- a/tests/multimodal/test_image.py
+++ b/tests/multimodal/test_image.py
@@ -3,9 +3,10 @@
 from pathlib import Path
 
 import numpy as np
+import pytest
 from PIL import Image, ImageChops
 
-from vllm.multimodal.image import convert_image_mode
+from vllm.multimodal.image import ImageMediaIO, convert_image_mode
 
 ASSETS_DIR = Path(__file__).parent / "assets"
 assert ASSETS_DIR.exists()
@@ -35,3 +36,115 @@ def test_rgba_to_rgb():
                 assert converted_image_numpy[i][j][0] == 255
                 assert converted_image_numpy[i][j][1] == 255
                 assert converted_image_numpy[i][j][2] == 255
+
+
+def test_rgba_to_rgb_custom_background(tmp_path):
+    """Test RGBA to RGB conversion with custom background colors."""
+    # Create a simple RGBA image with transparent and opaque pixels
+    rgba_image = Image.new("RGBA", (10, 10),
+                           (255, 0, 0, 255))  # Red with full opacity
+
+    # Make top-left quadrant transparent
+    for i in range(5):
+        for j in range(5):
+            rgba_image.putpixel((i, j), (0, 0, 0, 0))  # Fully transparent
+
+    # Save the test image to tmp_path
+    test_image_path = tmp_path / "test_rgba.png"
+    rgba_image.save(test_image_path)
+
+    # Test 1: Default white background (backward compatibility)
+    image_io_default = ImageMediaIO()
+    converted_default = image_io_default.load_file(test_image_path)
+    default_numpy = np.array(converted_default)
+
+    # Check transparent pixels are white
+    assert default_numpy[0][0][0] == 255  # R
+    assert default_numpy[0][0][1] == 255  # G
+    assert default_numpy[0][0][2] == 255  # B
+    # Check opaque pixels remain red
+    assert default_numpy[5][5][0] == 255  # R
+    assert default_numpy[5][5][1] == 0  # G
+    assert default_numpy[5][5][2] == 0  # B
+
+    # Test 2: Custom black background via kwargs
+    image_io_black = ImageMediaIO(rgba_background_color=(0, 0, 0))
+    converted_black = image_io_black.load_file(test_image_path)
+    black_numpy = np.array(converted_black)
+
+    # Check transparent pixels are black
+    assert black_numpy[0][0][0] == 0  # R
+    assert black_numpy[0][0][1] == 0  # G
+    assert black_numpy[0][0][2] == 0  # B
+    # Check opaque pixels remain red
+    assert black_numpy[5][5][0] == 255  # R
+    assert black_numpy[5][5][1] == 0  # G
+    assert black_numpy[5][5][2] == 0  # B
+
+    # Test 3: Custom blue background via kwargs (as list)
+    image_io_blue = ImageMediaIO(rgba_background_color=[0, 0, 255])
+    converted_blue = image_io_blue.load_file(test_image_path)
+    blue_numpy = np.array(converted_blue)
+
+    # Check transparent pixels are blue
+    assert blue_numpy[0][0][0] == 0  # R
+    assert blue_numpy[0][0][1] == 0  # G
+    assert blue_numpy[0][0][2] == 255  # B
+
+    # Test 4: Test with load_bytes method
+    with open(test_image_path, 'rb') as f:
+        image_data = f.read()
+
+    image_io_green = ImageMediaIO(rgba_background_color=(0, 255, 0))
+    converted_green = image_io_green.load_bytes(image_data)
+    green_numpy = np.array(converted_green)
+
+    # Check transparent pixels are green
+    assert green_numpy[0][0][0] == 0  # R
+    assert green_numpy[0][0][1] == 255  # G
+    assert green_numpy[0][0][2] == 0  # B
+
+
+def test_rgba_background_color_validation():
+    """Test that invalid rgba_background_color values are properly rejected."""
+
+    # Test invalid types
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color="255,255,255")
+
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=255)
+
+    # Test wrong number of elements
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255, 255))
+
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255, 255, 255, 255))
+
+    # Test non-integer values
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255.0, 255.0, 255.0))
+
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255, "255", 255))
+
+    # Test out of range values
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(256, 255, 255))
+
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255, -1, 255))
+
+    # Test that valid values work
+    ImageMediaIO(rgba_background_color=(0, 0, 0))  # Should not raise
+    ImageMediaIO(rgba_background_color=[255, 255, 255])  # Should not raise
+    ImageMediaIO(rgba_background_color=(128, 128, 128))  # Should not raise
diff --git a/vllm/multimodal/image.py b/vllm/multimodal/image.py
index a0448a80ac7c2..1006c1ce4b241 100644
--- a/vllm/multimodal/image.py
+++ b/vllm/multimodal/image.py
@@ -3,6 +3,7 @@
 
 from io import BytesIO
 from pathlib import Path
+from typing import Union
 
 import pybase64
 import torch
@@ -23,9 +24,10 @@ def rescale_image_size(image: Image.Image,
     return image
 
 
-# TODO: Support customizable background color to fill in.
 def rgba_to_rgb(
-    image: Image.Image, background_color=(255, 255, 255)) -> Image.Image:
+    image: Image.Image,
+    background_color: Union[tuple[int, int, int], list[int]] = (255, 255, 255)
+) -> Image.Image:
     """Convert an RGBA image to RGB with filled background color."""
     assert image.mode == "RGBA"
     converted = Image.new("RGB", image.size, background_color)
@@ -55,10 +57,35 @@ class ImageMediaIO(MediaIO[Image.Image]):
         # for flexible control.
         self.kwargs = kwargs
 
+        # Extract RGBA background color from kwargs if provided
+        # Default to white background for backward compatibility
+        rgba_bg = kwargs.get('rgba_background_color', (255, 255, 255))
+        # Convert list to tuple for consistency
+        if isinstance(rgba_bg, list):
+            rgba_bg = tuple(rgba_bg)
+
+        # Validate rgba_background_color format
+        if not (isinstance(rgba_bg, tuple) and len(rgba_bg) == 3
+                and all(isinstance(c, int) and 0 <= c <= 255
+                        for c in rgba_bg)):
+            raise ValueError(
+                "rgba_background_color must be a list or tuple of 3 integers "
+                "in the range [0, 255].")
+        self.rgba_background_color = rgba_bg
+
+    def _convert_image_mode(self, image: Image.Image) -> Image.Image:
+        """Convert image mode with custom background color."""
+        if image.mode == self.image_mode:
+            return image
+        elif image.mode == "RGBA" and self.image_mode == "RGB":
+            return rgba_to_rgb(image, self.rgba_background_color)
+        else:
+            return convert_image_mode(image, self.image_mode)
+
     def load_bytes(self, data: bytes) -> Image.Image:
         image = Image.open(BytesIO(data))
         image.load()
-        return convert_image_mode(image, self.image_mode)
+        return self._convert_image_mode(image)
 
     def load_base64(self, media_type: str, data: str) -> Image.Image:
         return self.load_bytes(pybase64.b64decode(data, validate=True))
@@ -66,7 +93,7 @@ class ImageMediaIO(MediaIO[Image.Image]):
     def load_file(self, filepath: Path) -> Image.Image:
         image = Image.open(filepath)
         image.load()
-        return convert_image_mode(image, self.image_mode)
+        return self._convert_image_mode(image)
 
     def encode_base64(
         self,
@@ -77,7 +104,7 @@ class ImageMediaIO(MediaIO[Image.Image]):
         image = media
 
         with BytesIO() as buffer:
-            image = convert_image_mode(image, self.image_mode)
+            image = self._convert_image_mode(image)
             image.save(buffer, image_format)
             data = buffer.getvalue()
 

From 5c54d9759d3e12d66919826bf1b7c196914d3a92 Mon Sep 17 00:00:00 2001
From: Abirdcfly <fp544037857@gmail.com>
Date: Fri, 1 Aug 2025 21:08:45 +0800
Subject: [PATCH 126/224] [Bugfix][PD] set max_completion_tokens=1 if req has
 this value (#21841)

Signed-off-by: Abirdcfly <fp544037857@gmail.com>
---
 .../online_serving/disaggregated_serving/disagg_proxy_demo.py   | 2 ++
 .../disagg_proxy_p2p_nccl_xpyd.py                               | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
index 16c32dcaa5d31..d39edb0b9d15c 100644
--- a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
+++ b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
@@ -293,6 +293,8 @@ class Proxy:
             # add params to request
             kv_prepare_request = request.copy()
             kv_prepare_request["max_tokens"] = 1
+            if "max_completion_tokens" in kv_prepare_request:
+                kv_prepare_request["max_completion_tokens"] = 1
 
             # prefill stage
             prefill_instance = self.schedule(self.prefill_cycler)
diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
index a6fd92feb2f11..73da7af85f1d9 100644
--- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
+++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
@@ -128,6 +128,8 @@ async def handle_request():
         prefill_request = original_request_data.copy()
         # change max_tokens = 1 to let it only do prefill
         prefill_request["max_tokens"] = 1
+        if "max_completion_tokens" in prefill_request:
+            prefill_request["max_completion_tokens"] = 1
 
         global count
         global prefill_instances

From a59cd9d9f7fd89e19beeffb7e7f89437d413eafb Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Fri, 1 Aug 2025 09:10:30 -0400
Subject: [PATCH 127/224] [Refactor] Fix Compile Warning #1444-D (#21462)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 csrc/moe/topk_softmax_kernels.cu | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/csrc/moe/topk_softmax_kernels.cu b/csrc/moe/topk_softmax_kernels.cu
index 0b505d2e04a21..7a7865b901de1 100644
--- a/csrc/moe/topk_softmax_kernels.cu
+++ b/csrc/moe/topk_softmax_kernels.cu
@@ -24,9 +24,12 @@
 #ifndef USE_ROCM
     #include <cub/util_type.cuh>
     #include <cub/cub.cuh>
+    #include <cuda/std/functional>
+    using AddOp = cuda::std::plus<float>;
 #else
     #include <hipcub/util_type.hpp>
     #include <hipcub/hipcub.hpp>
+    using AddOp = cub::Sum; 
 #endif
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -62,7 +65,6 @@ __launch_bounds__(TPB) __global__
 
     const int thread_row_offset = blockIdx.x * num_cols;
 
-    cub::Sum sum;
     float threadData(-FLT_MAX);
 
     // Don't touch finished rows.
@@ -92,7 +94,7 @@ __launch_bounds__(TPB) __global__
         threadData += exp((static_cast<float>(input[idx]) - float_max));
     }
 
-    const auto Z = BlockReduce(tmpStorage).Reduce(threadData, sum);
+    const auto Z = BlockReduce(tmpStorage).Reduce(threadData, AddOp());
 
     if (threadIdx.x == 0)
     {

From 8026a335a135af2e53c7d89652863312d7a3c936 Mon Sep 17 00:00:00 2001
From: Richard Zou <zou3519@users.noreply.github.com>
Date: Fri, 1 Aug 2025 10:11:29 -0400
Subject: [PATCH 128/224] [BugFix] Update AttnFusionPass cache key (#21947)

Signed-off-by: Richard Zou <zou3519@gmail.com>
---
 vllm/compilation/fusion_attn.py   | 3 +++
 vllm/compilation/inductor_pass.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/compilation/fusion_attn.py b/vllm/compilation/fusion_attn.py
index 79518b6f4f965..a40a8caf34a88 100644
--- a/vllm/compilation/fusion_attn.py
+++ b/vllm/compilation/fusion_attn.py
@@ -164,3 +164,6 @@ class AttnFusionPass(VllmInductorPass):
         logger.debug("Fused quantization onto %s attention nodes", count)
         self.dump_graph(graph, "after_attn_fusion")
         self.end_and_log()
+
+    def uuid(self):
+        return VllmInductorPass.hash_source(self, AttentionStaticQuantPattern)
diff --git a/vllm/compilation/inductor_pass.py b/vllm/compilation/inductor_pass.py
index 810d0801e9f38..2a149c65b3877 100644
--- a/vllm/compilation/inductor_pass.py
+++ b/vllm/compilation/inductor_pass.py
@@ -76,9 +76,10 @@ class InductorPass(CustomGraphPass):
         for src in srcs:
             if isinstance(src, str):
                 src_str = src
-            elif isinstance(src, types.FunctionType):
+            elif isinstance(src, (types.FunctionType, type)):
                 src_str = inspect.getsource(src)
             else:
+                # object instance
                 src_str = inspect.getsource(src.__class__)
             hasher.update(src_str.encode("utf-8"))
         return hasher.hexdigest()

From 3146519add735bc51a6a983af9e9c4a8b8d3373e Mon Sep 17 00:00:00 2001
From: Nick Hill <nhill@redhat.com>
Date: Fri, 1 Aug 2025 15:37:55 +0100
Subject: [PATCH 129/224] [BugFix] Don't change title of top-level process
 (#22032)

Signed-off-by: Nick Hill <nhill@redhat.com>
---
 vllm/entrypoints/cli/serve.py         | 11 ++++++-----
 vllm/entrypoints/openai/api_server.py |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py
index 0305354a66e85..9762a1de9edd3 100644
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@@ -18,7 +18,8 @@ from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG,
                                     show_filtered_argument_or_group_from_help)
 from vllm.logger import init_logger
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import FlexibleArgumentParser, decorate_logs, get_tcp_uri
+from vllm.utils import (FlexibleArgumentParser, decorate_logs, get_tcp_uri,
+                        set_process_title)
 from vllm.v1.engine.core import EngineCoreProc
 from vllm.v1.engine.utils import CoreEngineProcManager, launch_core_engines
 from vllm.v1.executor.abstract import Executor
@@ -74,7 +75,7 @@ def run_headless(args: argparse.Namespace):
 
     if args.api_server_count > 1:
         raise ValueError("api_server_count can't be set in headless mode")
-    # set_process_title("Headless_ProcManager")
+
     # Create the EngineConfig.
     engine_args = vllm.AsyncEngineArgs.from_cli_args(args)
     usage_context = UsageContext.OPENAI_API_SERVER
@@ -139,8 +140,6 @@ def run_multi_api_server(args: argparse.Namespace):
 
     orig_disable_mm_preprocessor_cache = args.disable_mm_preprocessor_cache
 
-    # set_process_title("ProcManager")
-
     if num_api_servers > 1:
         setup_multiprocess_prometheus()
 
@@ -225,7 +224,9 @@ def run_api_server_worker_proc(listen_address,
                                **uvicorn_kwargs) -> None:
     """Entrypoint for individual API server worker processes."""
 
-    # Add process-specific prefix to stdout and stderr.
+    # Set process title and add process-specific prefix to stdout and stderr.
+    server_index = client_config.get("client_index", 0) if client_config else 0
+    set_process_title("APIServer", str(server_index))
     decorate_logs()
 
     uvloop.run(
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 26db1357da4d0..1be03c57a1f1b 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -102,7 +102,7 @@ from vllm.transformers_utils.tokenizer import MistralTokenizer
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils import (Device, FlexibleArgumentParser, decorate_logs,
                         get_open_zmq_ipc_path, is_valid_ipv6_address,
-                        set_process_title, set_ulimit)
+                        set_ulimit)
 from vllm.v1.metrics.prometheus import get_prometheus_registry
 from vllm.version import __version__ as VLLM_VERSION
 
@@ -1824,7 +1824,7 @@ async def run_server_worker(listen_address,
         ToolParserManager.import_tool_parser(args.tool_parser_plugin)
 
     server_index = client_config.get("client_index", 0) if client_config else 0
-    set_process_title("APIServer", str(server_index))
+
     # Load logging config for uvicorn if specified
     log_config = load_log_config(args.log_config_file)
     if log_config is not None:

From 97608dc276c292d9217eb6d334d969c5e89913c6 Mon Sep 17 00:00:00 2001
From: David Xia <david@davidxia.com>
Date: Fri, 1 Aug 2025 10:55:55 -0400
Subject: [PATCH 130/224] [Docs] use `uv` in CPU installation docs (#22089)

Signed-off-by: David Xia <david@davidxia.com>
---
 .../installation/cpu/apple.inc.md             | 12 ++---
 .../installation/cpu/build.inc.md             | 22 +++++----
 .../installation/cpu/s390x.inc.md             | 45 ++++++++++++-------
 3 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/docs/getting_started/installation/cpu/apple.inc.md b/docs/getting_started/installation/cpu/apple.inc.md
index 0816f38ac68a1..2828173a76a9a 100644
--- a/docs/getting_started/installation/cpu/apple.inc.md
+++ b/docs/getting_started/installation/cpu/apple.inc.md
@@ -1,6 +1,6 @@
 # --8<-- [start:installation]
 
-vLLM has experimental support for macOS with Apple silicon. For now, users shall build from the source vLLM to natively run on macOS.
+vLLM has experimental support for macOS with Apple silicon. For now, users must build from source to natively run on macOS.
 
 Currently the CPU implementation for macOS supports FP32 and FP16 datatypes.
 
@@ -23,20 +23,20 @@ Currently the CPU implementation for macOS supports FP32 and FP16 datatypes.
 # --8<-- [end:pre-built-wheels]
 # --8<-- [start:build-wheel-from-source]
 
-After installation of XCode and the Command Line Tools, which include Apple Clang, execute the following commands to build and install vLLM from the source.
+After installation of XCode and the Command Line Tools, which include Apple Clang, execute the following commands to build and install vLLM from source.
 
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-pip install -r requirements/cpu.txt
-pip install -e .
+uv pip install -r requirements/cpu.txt
+uv pip install -e .
 ```
 
 !!! note
-    On macOS the `VLLM_TARGET_DEVICE` is automatically set to `cpu`, which currently is the only supported device.
+    On macOS the `VLLM_TARGET_DEVICE` is automatically set to `cpu`, which is currently the only supported device.
 
 !!! example "Troubleshooting"
-    If the build has error like the following snippet where standard C++ headers cannot be found, try to remove and reinstall your
+    If the build fails with errors like the following where standard C++ headers cannot be found, try to remove and reinstall your
     [Command Line Tools for Xcode](https://developer.apple.com/download/all/).
 
     ```text
diff --git a/docs/getting_started/installation/cpu/build.inc.md b/docs/getting_started/installation/cpu/build.inc.md
index fa777fe0c8a1a..57a09e674a821 100644
--- a/docs/getting_started/installation/cpu/build.inc.md
+++ b/docs/getting_started/installation/cpu/build.inc.md
@@ -1,4 +1,4 @@
-First, install recommended compiler. We recommend to use `gcc/g++ >= 12.3.0` as the default compiler to avoid potential problems. For example, on Ubuntu 22.4, you can run:
+First, install the recommended compiler. We recommend using `gcc/g++ >= 12.3.0` as the default compiler to avoid potential problems. For example, on Ubuntu 22.4, you can run:
 
 ```bash
 sudo apt-get update  -y
@@ -6,28 +6,34 @@ sudo apt-get install -y --no-install-recommends ccache git curl wget ca-certific
 sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
 ```
 
-Second, clone vLLM project:
+Second, clone the vLLM project:
 
 ```bash
 git clone https://github.com/vllm-project/vllm.git vllm_source
 cd vllm_source
 ```
 
-Third, install Python packages for vLLM CPU backend building:
+Third, install required dependencies:
 
 ```bash
-pip install --upgrade pip
-pip install -v -r requirements/cpu-build.txt --extra-index-url https://download.pytorch.org/whl/cpu
-pip install -v -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
+uv pip install -r requirements/cpu-build.txt --torch-backend auto
+uv pip install -r requirements/cpu.txt --torch-backend auto
 ```
 
-Finally, build and install vLLM CPU backend:
+??? console "pip"
+    ```bash
+    pip install --upgrade pip
+    pip install -v -r requirements/cpu-build.txt --extra-index-url https://download.pytorch.org/whl/cpu
+    pip install -v -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
+    ```
+
+Finally, build and install vLLM:
 
 ```bash
 VLLM_TARGET_DEVICE=cpu python setup.py install
 ```
 
-If you want to develop vllm, install it in editable mode instead.
+If you want to develop vLLM, install it in editable mode instead.
 
 ```bash
 VLLM_TARGET_DEVICE=cpu python setup.py develop
diff --git a/docs/getting_started/installation/cpu/s390x.inc.md b/docs/getting_started/installation/cpu/s390x.inc.md
index acfb3396896bf..c1917267ce91b 100644
--- a/docs/getting_started/installation/cpu/s390x.inc.md
+++ b/docs/getting_started/installation/cpu/s390x.inc.md
@@ -1,6 +1,6 @@
 # --8<-- [start:installation]
 
-vLLM has experimental support for s390x architecture on IBM Z platform. For now, users shall build from the vLLM source to natively run on IBM Z platform.
+vLLM has experimental support for s390x architecture on IBM Z platform. For now, users must build from source to natively run on IBM Z platform.
 
 Currently the CPU implementation for s390x architecture supports FP32 datatype only.
 
@@ -40,21 +40,32 @@ curl https://sh.rustup.rs -sSf | sh -s -- -y && \
     . "$HOME/.cargo/env"
 ```
 
-Execute the following commands to build and install vLLM from the source.
+Execute the following commands to build and install vLLM from source.
 
 !!! tip
-    Please build the following dependencies, `torchvision`, `pyarrow` from the source before building vLLM.
+    Please build the following dependencies, `torchvision`, `pyarrow` from source before building vLLM.
 
 ```bash
     sed -i '/^torch/d' requirements-build.txt    # remove torch from requirements-build.txt since we use nightly builds
-    pip install -v \
-        --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
+    uv pip install -v \
+        --torch-backend auto \
         -r requirements-build.txt \
         -r requirements-cpu.txt \
     VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
-    pip install dist/*.whl
+        uv pip install dist/*.whl
 ```
 
+??? console "pip"
+    ```bash
+        sed -i '/^torch/d' requirements-build.txt    # remove torch from requirements-build.txt since we use nightly builds
+        pip install -v \
+            --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
+            -r requirements-build.txt \
+            -r requirements-cpu.txt \
+        VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
+            pip install dist/*.whl
+    ```
+
 # --8<-- [end:build-wheel-from-source]
 # --8<-- [start:pre-built-images]
 
@@ -63,19 +74,19 @@ Execute the following commands to build and install vLLM from the source.
 
 ```bash
 docker build -f docker/Dockerfile.s390x \
-        --tag vllm-cpu-env .
+    --tag vllm-cpu-env .
 
-# Launching OpenAI server
+# Launch OpenAI server
 docker run --rm \
-            --privileged=true \
-            --shm-size=4g \
-            -p 8000:8000 \
-            -e VLLM_CPU_KVCACHE_SPACE=<KV cache space> \
-            -e VLLM_CPU_OMP_THREADS_BIND=<CPU cores for inference> \
-            vllm-cpu-env \
-            --model=meta-llama/Llama-3.2-1B-Instruct \
-            --dtype=float \
-            other vLLM OpenAI server arguments
+    --privileged true \
+    --shm-size 4g \
+    -p 8000:8000 \
+    -e VLLM_CPU_KVCACHE_SPACE=<KV cache space> \
+    -e VLLM_CPU_OMP_THREADS_BIND=<CPU cores for inference> \
+    vllm-cpu-env \
+    --model meta-llama/Llama-3.2-1B-Instruct \
+    --dtype float \
+    other vLLM OpenAI server arguments
 ```
 
 # --8<-- [end:build-image-from-source]

From 2d7b09b998980b9ccbb3708632b47bc28de076aa Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 1 Aug 2025 17:16:37 +0100
Subject: [PATCH 131/224] Deprecate `--disable-log-requests` and replace with
 `--enable-log-requests` (#21739)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .buildkite/nightly-benchmarks/README.md       |  1 -
 .../tests/genai-perf-tests.json               |  1 -
 .../tests/nightly-tests.json                  |  6 ----
 .../tests/serving-tests-cpu-snc2.json         |  6 ----
 .../tests/serving-tests-cpu-snc3.json         |  6 ----
 .../tests/serving-tests-cpu.json              |  5 ----
 .../tests/serving-tests.json                  |  6 +---
 tests/config/test_mp_reducer.py               |  1 -
 tests/mq_llm_engine/test_load.py              |  2 +-
 tests/v1/engine/test_async_llm.py             |  4 +--
 tests/v1/test_async_llm_dp.py                 |  1 -
 vllm/engine/arg_utils.py                      | 30 ++++++++++++++++---
 vllm/engine/async_llm_engine.py               | 26 +++++++++-------
 vllm/engine/multiprocessing/engine.py         | 27 ++++++++++++-----
 vllm/entrypoints/openai/api_server.py         | 12 ++++----
 vllm/entrypoints/openai/run_batch.py          |  6 ++--
 vllm/utils/__init__.py                        |  6 ++++
 vllm/v1/engine/async_llm.py                   | 30 +++++++++++--------
 18 files changed, 97 insertions(+), 79 deletions(-)

diff --git a/.buildkite/nightly-benchmarks/README.md b/.buildkite/nightly-benchmarks/README.md
index fcde284efea98..3721d3d1d6749 100644
--- a/.buildkite/nightly-benchmarks/README.md
+++ b/.buildkite/nightly-benchmarks/README.md
@@ -104,7 +104,6 @@ We test the throughput by using `vllm bench serve` with request rate = inf to co
             "tensor_parallel_size": 1,
             "swap_space": 16,
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "load_format": "dummy"
         },
         "client_parameters": {
diff --git a/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json b/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json
index edbe9f2df0ce0..f26ae7634f3d9 100644
--- a/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json
+++ b/.buildkite/nightly-benchmarks/tests/genai-perf-tests.json
@@ -11,7 +11,6 @@
         },
         "vllm_server_parameters": {
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "gpu_memory_utilization": 0.9,
             "num_scheduler_steps": 10,
             "max_num_seqs": 512,
diff --git a/.buildkite/nightly-benchmarks/tests/nightly-tests.json b/.buildkite/nightly-benchmarks/tests/nightly-tests.json
index fda1a7a3ec53c..41b4a4008801d 100644
--- a/.buildkite/nightly-benchmarks/tests/nightly-tests.json
+++ b/.buildkite/nightly-benchmarks/tests/nightly-tests.json
@@ -35,7 +35,6 @@
         }, 
         "vllm_server_parameters": {
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "gpu_memory_utilization": 0.9,
             "num_scheduler_steps": 10,
             "max_num_seqs": 512,
@@ -90,7 +89,6 @@
         }, 
         "vllm_server_parameters": {
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "gpu_memory_utilization": 0.9,
             "num_scheduler_steps": 10,
             "max_num_seqs": 512,
@@ -145,7 +143,6 @@
         }, 
         "vllm_server_parameters": {
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "gpu_memory_utilization": 0.9,
             "num_scheduler_steps": 10,
             "max_num_seqs": 512,
@@ -197,7 +194,6 @@
         }, 
         "vllm_server_parameters": {
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "gpu_memory_utilization": 0.9,
             "num_scheduler_steps": 10,
             "max_num_seqs": 512,
@@ -251,7 +247,6 @@
         }, 
         "vllm_server_parameters": {
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "gpu_memory_utilization": 0.9,
             "num_scheduler_steps": 10,
             "max_num_seqs": 512,
@@ -305,7 +300,6 @@
         }, 
         "vllm_server_parameters": {
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "gpu_memory_utilization": 0.9,
             "num_scheduler_steps": 10,
             "max_num_seqs": 512,
diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
index a144b4420fbf1..dd0e24edff98d 100644
--- a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
+++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
@@ -17,7 +17,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -50,7 +49,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -83,7 +81,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -117,7 +114,6 @@
 	    "trust_remote_code": "",
 	    "enable_chunked_prefill": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -153,7 +149,6 @@
 	    "trust_remote_code": "",
 	    "enable_chunked_prefill": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -189,7 +184,6 @@
 	    "trust_remote_code": "",
 	    "enable_chunked_prefill": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json
index e6e69b63b74df..f1bda65a7590b 100644
--- a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json
+++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json
@@ -17,7 +17,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -50,7 +49,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -84,7 +82,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -118,7 +115,6 @@
 	    "trust_remote_code": "",
 	    "enable_chunked_prefill": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -154,7 +150,6 @@
 	    "trust_remote_code": "",
 	    "enable_chunked_prefill": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -191,7 +186,6 @@
 	    "trust_remote_code": "",
 	    "enable_chunked_prefill": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
index ce1f924de387f..f150b9abeea45 100644
--- a/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
+++ b/.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
@@ -17,7 +17,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -50,7 +49,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -83,7 +81,6 @@
 	    "block_size": 128,
 	    "trust_remote_code": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -117,7 +114,6 @@
 	    "trust_remote_code": "",
 	    "enable_chunked_prefill": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
@@ -153,7 +149,6 @@
 	    "trust_remote_code": "",
 	    "enable_chunked_prefill": "",
             "disable_log_stats": "",
-            "disable_log_requests": "",
 	    "enforce_eager": "",
 	    "max_num_batched_tokens": 2048,
 	    "max_num_seqs": 256,
diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests.json b/.buildkite/nightly-benchmarks/tests/serving-tests.json
index 13fd5aa8db97b..a6d4141d5c2dc 100644
--- a/.buildkite/nightly-benchmarks/tests/serving-tests.json
+++ b/.buildkite/nightly-benchmarks/tests/serving-tests.json
@@ -7,7 +7,6 @@
             "tensor_parallel_size": 1,
             "swap_space": 16,
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "load_format": "dummy"
         },
         "client_parameters": {
@@ -26,7 +25,6 @@
             "tensor_parallel_size": 4,
             "swap_space": 16,
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "load_format": "dummy"
         },
         "client_parameters": {
@@ -45,7 +43,6 @@
             "tensor_parallel_size": 2,
             "swap_space": 16,
             "disable_log_stats": "",
-            "disable_log_requests": "",
             "load_format": "dummy"
         },
         "client_parameters": {
@@ -60,8 +57,7 @@
         "test_name": "serving_llama70B_tp4_sharegpt_specdecode",
         "qps_list": [2],
         "server_parameters": {
-            "model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
-            "disable_log_requests": "", 
+            "model": "meta-llama/Meta-Llama-3.1-70B-Instruct", 
             "tensor_parallel_size": 4,
             "swap_space": 16,
             "speculative_config": {
diff --git a/tests/config/test_mp_reducer.py b/tests/config/test_mp_reducer.py
index ee351cbfa7c16..d4d4be293280b 100644
--- a/tests/config/test_mp_reducer.py
+++ b/tests/config/test_mp_reducer.py
@@ -28,7 +28,6 @@ def test_mp_reducer(monkeypatch):
             max_model_len=32,
             gpu_memory_utilization=0.1,
             disable_log_stats=True,
-            disable_log_requests=True,
         )
 
         async_llm = AsyncLLM.from_engine_args(
diff --git a/tests/mq_llm_engine/test_load.py b/tests/mq_llm_engine/test_load.py
index e9fd5b814f285..c934706611ae3 100644
--- a/tests/mq_llm_engine/test_load.py
+++ b/tests/mq_llm_engine/test_load.py
@@ -16,7 +16,7 @@ NUM_EXPECTED_TOKENS = 10
 NUM_REQUESTS = 10000
 
 # Scenarios to test for num generated token.
-ENGINE_ARGS = AsyncEngineArgs(model=MODEL, disable_log_requests=True)
+ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
 
 
 @pytest.fixture(scope="function")
diff --git a/tests/v1/engine/test_async_llm.py b/tests/v1/engine/test_async_llm.py
index 412df3acff126..21694491dd73a 100644
--- a/tests/v1/engine/test_async_llm.py
+++ b/tests/v1/engine/test_async_llm.py
@@ -26,12 +26,10 @@ if not current_platform.is_cuda():
 TEXT_ENGINE_ARGS = AsyncEngineArgs(
     model="meta-llama/Llama-3.2-1B-Instruct",
     enforce_eager=True,
-    disable_log_requests=True,
 )
 
 VISION_ENGINE_ARGS = AsyncEngineArgs(model="Qwen/Qwen2-VL-2B-Instruct",
-                                     enforce_eager=True,
-                                     disable_log_requests=True)
+                                     enforce_eager=True)
 
 TEXT_PROMPT = "Hello my name is Robert and"
 
diff --git a/tests/v1/test_async_llm_dp.py b/tests/v1/test_async_llm_dp.py
index 6716d27f571f9..c2610a87ac780 100644
--- a/tests/v1/test_async_llm_dp.py
+++ b/tests/v1/test_async_llm_dp.py
@@ -25,7 +25,6 @@ DP_SIZE = int(os.getenv("DP_SIZE", 2))
 engine_args = AsyncEngineArgs(
     model="ibm-research/PowerMoE-3b",
     enforce_eager=True,
-    disable_log_requests=True,
     tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
     data_parallel_size=DP_SIZE,
 )
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index f938f19b90469..0d38b5b5302c1 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -18,7 +18,7 @@ from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List,
 import regex as re
 import torch
 from pydantic import TypeAdapter, ValidationError
-from typing_extensions import TypeIs
+from typing_extensions import TypeIs, deprecated
 
 import vllm.envs as envs
 from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
@@ -1704,7 +1704,23 @@ class EngineArgs:
 @dataclass
 class AsyncEngineArgs(EngineArgs):
     """Arguments for asynchronous vLLM engine."""
-    disable_log_requests: bool = False
+    enable_log_requests: bool = False
+
+    @property
+    @deprecated(
+        "`disable_log_requests` is deprecated and has been replaced with "
+        "`enable_log_requests`. This will be removed in v0.12.0. Please use "
+        "`enable_log_requests` instead.")
+    def disable_log_requests(self) -> bool:
+        return not self.enable_log_requests
+
+    @disable_log_requests.setter
+    @deprecated(
+        "`disable_log_requests` is deprecated and has been replaced with "
+        "`enable_log_requests`. This will be removed in v0.12.0. Please use "
+        "`enable_log_requests` instead.")
+    def disable_log_requests(self, value: bool):
+        self.enable_log_requests = not value
 
     @staticmethod
     def add_cli_args(parser: FlexibleArgumentParser,
@@ -1715,9 +1731,15 @@ class AsyncEngineArgs(EngineArgs):
         load_general_plugins()
         if not async_args_only:
             parser = EngineArgs.add_cli_args(parser)
+        parser.add_argument('--enable-log-requests',
+                            action=argparse.BooleanOptionalAction,
+                            default=AsyncEngineArgs.enable_log_requests,
+                            help='Enable logging requests.')
         parser.add_argument('--disable-log-requests',
-                            action='store_true',
-                            help='Disable logging requests.')
+                            action=argparse.BooleanOptionalAction,
+                            default=not AsyncEngineArgs.enable_log_requests,
+                            help='[DEPRECATED] Disable logging requests.',
+                            deprecated=True)
         current_platform.pre_register_and_update(parser)
         return parser
 
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 06bb4eeab69eb..1f962b008ee03 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -30,7 +30,7 @@ from vllm.sampling_params import SamplingParams
 from vllm.sequence import ExecuteModelRequest
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import Device, weak_bind
+from vllm.utils import Device, deprecate_kwargs, weak_bind
 
 logger = init_logger(__name__)
 ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
@@ -554,14 +554,20 @@ class AsyncLLMEngine(EngineClient):
         return LLMEngine._get_executor_cls(engine_config)
 
     @classmethod
+    @deprecate_kwargs(
+        "disable_log_requests",
+        additional_message=("This argument will have no effect. "
+                            "Use `enable_log_requests` instead."),
+    )
     def from_vllm_config(
-        cls,
-        vllm_config: VllmConfig,
-        start_engine_loop: bool = True,
-        usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
-        stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
-        disable_log_requests: bool = False,
-        disable_log_stats: bool = False,
+            cls,
+            vllm_config: VllmConfig,
+            start_engine_loop: bool = True,
+            usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
+            stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
+            enable_log_requests: bool = False,
+            disable_log_stats: bool = False,
+            disable_log_requests: bool = True,  # Deprecated, will be removed
     ) -> "AsyncLLMEngine":
         """Create an AsyncLLMEngine from the EngineArgs."""
 
@@ -569,7 +575,7 @@ class AsyncLLMEngine(EngineClient):
             vllm_config=vllm_config,
             executor_class=cls._get_executor_cls(vllm_config),
             start_engine_loop=start_engine_loop,
-            log_requests=not disable_log_requests,
+            log_requests=enable_log_requests,
             log_stats=not disable_log_stats,
             usage_context=usage_context,
             stat_loggers=stat_loggers,
@@ -598,7 +604,7 @@ class AsyncLLMEngine(EngineClient):
             usage_context=usage_context,
             stat_loggers=stat_loggers,
             disable_log_stats=engine_args.disable_log_stats,
-            disable_log_requests=engine_args.disable_log_requests,
+            enable_log_requests=engine_args.enable_log_requests,
         )
 
     @property
diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py
index fe6eb0d8c2f1a..903f3fd71ebcd 100644
--- a/vllm/engine/multiprocessing/engine.py
+++ b/vllm/engine/multiprocessing/engine.py
@@ -34,6 +34,7 @@ from vllm.outputs import RequestOutput
 from vllm.transformers_utils.config import (
     maybe_register_config_serialize_by_value)
 from vllm.usage.usage_lib import UsageContext
+from vllm.utils import deprecate_kwargs
 from vllm.worker.model_runner_base import InputProcessingError
 
 logger = init_logger(__name__)
@@ -120,10 +121,20 @@ class MQLLMEngine:
             return ENGINE_DEAD_ERROR()
 
     @classmethod
-    def from_vllm_config(cls, vllm_config: VllmConfig,
-                         usage_context: UsageContext,
-                         disable_log_requests: bool, disable_log_stats: bool,
-                         ipc_path: str) -> "MQLLMEngine":
+    @deprecate_kwargs(
+        "disable_log_requests",
+        additional_message=("This argument will have no effect. "
+                            "Use `enable_log_requests` instead."),
+    )
+    def from_vllm_config(
+            cls,
+            vllm_config: VllmConfig,
+            usage_context: UsageContext,
+            enable_log_requests: bool,
+            disable_log_stats: bool,
+            ipc_path: str,
+            disable_log_requests: bool = True,  # Deprecated, will be removed
+    ) -> "MQLLMEngine":
         # Setup plugins for each process
         from vllm.plugins import load_general_plugins
         load_general_plugins()
@@ -136,7 +147,7 @@ class MQLLMEngine:
             ipc_path=ipc_path,
             usage_context=usage_context,
             use_async_sockets=use_async_sockets,
-            log_requests=(not disable_log_requests),
+            log_requests=enable_log_requests,
             log_stats=(not disable_log_stats),
         )
 
@@ -150,7 +161,7 @@ class MQLLMEngine:
             ipc_path=ipc_path,
             vllm_config=vllm_config,
             usage_context=usage_context,
-            disable_log_requests=engine_args.disable_log_requests,
+            enable_log_requests=engine_args.enable_log_requests,
             disable_log_stats=engine_args.disable_log_stats,
         )
 
@@ -436,7 +447,7 @@ def signal_handler(*_) -> None:
 
 def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
                   ipc_path: str, disable_log_stats: bool,
-                  disable_log_requests: bool, engine_alive):
+                  enable_log_requests: bool, engine_alive):
     try:
         # Ensure we can serialize transformer config before spawning
         maybe_register_config_serialize_by_value()
@@ -445,7 +456,7 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
             vllm_config=vllm_config,
             usage_context=usage_context,
             disable_log_stats=disable_log_stats,
-            disable_log_requests=disable_log_requests,
+            enable_log_requests=enable_log_requests,
             ipc_path=ipc_path)
 
         signal.signal(signal.SIGTERM, signal_handler)
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 1be03c57a1f1b..b8ec5461f7719 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -205,7 +205,7 @@ async def build_async_engine_client_from_engine_args(
             async_llm = AsyncLLM.from_vllm_config(
                 vllm_config=vllm_config,
                 usage_context=usage_context,
-                disable_log_requests=engine_args.disable_log_requests,
+                enable_log_requests=engine_args.enable_log_requests,
                 disable_log_stats=engine_args.disable_log_stats,
                 client_addresses=client_config,
                 client_index=client_index)
@@ -227,7 +227,7 @@ async def build_async_engine_client_from_engine_args(
             engine_client = AsyncLLMEngine.from_vllm_config(
                 vllm_config=vllm_config,
                 usage_context=usage_context,
-                disable_log_requests=engine_args.disable_log_requests,
+                enable_log_requests=engine_args.enable_log_requests,
                 disable_log_stats=engine_args.disable_log_stats)
             yield engine_client
         finally:
@@ -272,7 +272,7 @@ async def build_async_engine_client_from_engine_args(
             target=run_mp_engine,
             args=(vllm_config, UsageContext.OPENAI_API_SERVER, ipc_path,
                   engine_args.disable_log_stats,
-                  engine_args.disable_log_requests, engine_alive))
+                  engine_args.enable_log_requests, engine_alive))
         engine_process.start()
         engine_pid = engine_process.pid
         assert engine_pid is not None, "Engine process failed to start."
@@ -1570,10 +1570,10 @@ async def init_app_state(
     else:
         served_model_names = [args.model]
 
-    if args.disable_log_requests:
-        request_logger = None
-    else:
+    if args.enable_log_requests:
         request_logger = RequestLogger(max_log_len=args.max_log_len)
+    else:
+        request_logger = None
 
     base_model_paths = [
         BaseModelPath(name=name, model_path=args.model)
diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py
index 137b368dad202..d146ad485d194 100644
--- a/vllm/entrypoints/openai/run_batch.py
+++ b/vllm/entrypoints/openai/run_batch.py
@@ -324,10 +324,10 @@ async def run_batch(
     else:
         served_model_names = [args.model]
 
-    if args.disable_log_requests:
-        request_logger = None
-    else:
+    if args.enable_log_requests:
         request_logger = RequestLogger(max_log_len=args.max_log_len)
+    else:
+        request_logger = None
 
     base_model_paths = [
         BaseModelPath(name=name, model_path=args.model)
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index d5d8d9dad73a8..7405f3986df8d 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -1668,6 +1668,12 @@ class FlexibleArgumentParser(ArgumentParser):
         # Enable the deprecated kwarg for Python 3.12 and below
 
         def parse_known_args(self, args=None, namespace=None):
+            if args is not None and "--disable-log-requests" in args:
+                # Special case warning because the warning below won't trigger
+                # if –-disable-log-requests because its value is default.
+                logger.warning_once(
+                    "argument '--disable-log-requests' is deprecated. This "
+                    "will be removed in v0.12.0.")
             namespace, args = super().parse_known_args(args, namespace)
             for action in FlexibleArgumentParser._deprecated:
                 if (hasattr(namespace, dest := action.dest)
diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
index ed0d9620f4762..308ca32105ba9 100644
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -27,7 +27,7 @@ from vllm.transformers_utils.config import (
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
 from vllm.usage.usage_lib import UsageContext
-from vllm.utils import Device, cdiv
+from vllm.utils import Device, cdiv, deprecate_kwargs
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.core_client import EngineCoreClient
 from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
@@ -142,16 +142,22 @@ class AsyncLLM(EngineClient):
             pass
 
     @classmethod
+    @deprecate_kwargs(
+        "disable_log_requests",
+        additional_message=("This argument will have no effect. "
+                            "Use `enable_log_requests` instead."),
+    )
     def from_vllm_config(
-        cls,
-        vllm_config: VllmConfig,
-        start_engine_loop: bool = True,
-        usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
-        stat_loggers: Optional[list[StatLoggerFactory]] = None,
-        disable_log_requests: bool = False,
-        disable_log_stats: bool = False,
-        client_addresses: Optional[dict[str, str]] = None,
-        client_index: int = 0,
+            cls,
+            vllm_config: VllmConfig,
+            start_engine_loop: bool = True,
+            usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
+            stat_loggers: Optional[list[StatLoggerFactory]] = None,
+            enable_log_requests: bool = False,
+            disable_log_stats: bool = False,
+            client_addresses: Optional[dict[str, str]] = None,
+            client_index: int = 0,
+            disable_log_requests: bool = True,  # Deprecated, will be removed
     ) -> "AsyncLLM":
         if not envs.VLLM_USE_V1:
             raise ValueError(
@@ -166,7 +172,7 @@ class AsyncLLM(EngineClient):
             executor_class=Executor.get_class(vllm_config),
             start_engine_loop=start_engine_loop,
             stat_loggers=stat_loggers,
-            log_requests=not disable_log_requests,
+            log_requests=enable_log_requests,
             log_stats=not disable_log_stats,
             usage_context=usage_context,
             client_addresses=client_addresses,
@@ -191,7 +197,7 @@ class AsyncLLM(EngineClient):
         return cls(
             vllm_config=vllm_config,
             executor_class=executor_class,
-            log_requests=not engine_args.disable_log_requests,
+            log_requests=engine_args.enable_log_requests,
             log_stats=not engine_args.disable_log_stats,
             start_engine_loop=start_engine_loop,
             usage_context=usage_context,

From 326a1b001db10afc2dc5b2bfcb60a3b8f8bcb2ac Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 1 Aug 2025 17:32:27 +0100
Subject: [PATCH 132/224] Improve documentation of
 `ModelConfig.try_get_generation_config` to prevent future confusion (#21526)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 vllm/config.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 2d61552c5dadc..124d62b699771 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1575,7 +1575,18 @@ class ModelConfig:
         return self.multimodal_config
 
     def try_get_generation_config(self) -> dict[str, Any]:
-        if self.generation_config in ("auto", "vllm"):
+        """
+        This method attempts to retrieve the non-default values of the
+        generation config for this model.
+        
+        The generation config can contain information about special tokens, as
+        well as sampling parameters. Which is why this method exists separately
+        to `get_diff_sampling_param`.
+
+        Returns:
+            A dictionary containing the non-default generation config.
+        """
+        if self.generation_config in {"auto", "vllm"}:
             config = try_get_generation_config(
                 self.hf_config_path or self.model,
                 trust_remote_code=self.trust_remote_code,
@@ -1594,13 +1605,18 @@ class ModelConfig:
 
     def get_diff_sampling_param(self) -> dict[str, Any]:
         """
-        This method returns a dictionary containing the parameters
-        that differ from the default sampling parameters. If
-        `generation_config` is `"vllm"`, an empty dictionary is returned.
+        This method returns a dictionary containing the non-default sampling
+        parameters with `override_generation_config` applied.
+
+        The default sampling parameters are:
+
+        - vLLM's neutral defaults if `self.generation_config="vllm"`
+        - the model's defaults if `self.generation_config="auto"`
+        - as defined in `generation_config.json` if
+            `self.generation_config="path/to/generation_config/dir"`
 
         Returns:
-            dict[str, Any]: A dictionary with the differing sampling
-            parameters, if `generation_config` is `"vllm"` an empty dictionary.
+            A dictionary containing the non-default sampling parameters.
         """
         if self.generation_config == "vllm":
             config = {}

From 3f8e9521791dd3f41c90cc2b3c9e78a1951f5237 Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Sat, 2 Aug 2025 00:33:30 +0800
Subject: [PATCH 133/224] [Bugfix] Fix glm4.1v video inference issue (#22067)

Signed-off-by: Isotr0py <2037008807@qq.com>
---
 .../multimodal/processing/test_glm4_1v.py     | 51 +++++++++++++++++++
 vllm/model_executor/models/glm4_1v.py         |  8 +--
 2 files changed, 53 insertions(+), 6 deletions(-)
 create mode 100644 tests/models/multimodal/processing/test_glm4_1v.py

diff --git a/tests/models/multimodal/processing/test_glm4_1v.py b/tests/models/multimodal/processing/test_glm4_1v.py
new file mode 100644
index 0000000000000..d1c5fa8fec6d2
--- /dev/null
+++ b/tests/models/multimodal/processing/test_glm4_1v.py
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.assets.video import VideoAsset
+from vllm.multimodal import MULTIMODAL_REGISTRY
+
+from ...utils import build_model_context
+
+
+@pytest.mark.parametrize("model_id", ["THUDM/GLM-4.1V-9B-Thinking"])
+@pytest.mark.parametrize("expected_toks_per_frame", [299])
+@pytest.mark.parametrize("num_frames", [32, 128])
+@pytest.mark.parametrize("fps, expected_grid_t", [(1, 5), (2, 10)])
+def test_processor_override(
+    model_id: str,
+    expected_toks_per_frame: int,
+    expected_grid_t: int,
+    fps: int,
+    num_frames: int,
+):
+    """Ensure GLM4vMultiModalProcessor can handle video frames properly."""
+    ctx = build_model_context(
+        model_id,
+        mm_processor_kwargs=None,
+        limit_mm_per_prompt={"video": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+    tokenizer = processor.info.get_tokenizer()
+    hf_processor_mm_kwargs = {"fps": fps}
+
+    # Build the image str / prompt based on the number of images we pass
+    video_assets = VideoAsset(name="baby_reading", num_frames=num_frames)
+    prompt = "<|begin_of_video|><|video|><|end_of_video|>"
+
+    video, metadata = video_assets.np_ndarrays, video_assets.metadata
+    metadata["fps"] = fps
+    mm_data = {"video": [(video, metadata)]}
+
+    processed_inputs = processor.apply(prompt, mm_data, hf_processor_mm_kwargs)
+
+    # Ensure we have the right number of placeholders per num_crops size
+    hf_processor = processor.info.get_hf_processor(**hf_processor_mm_kwargs)
+    video_token_id = tokenizer.convert_tokens_to_ids(hf_processor.video_token)
+    video_tok_count = processed_inputs["prompt_token_ids"].count(
+        video_token_id)
+    grid_t, _, _ = processed_inputs["mm_kwargs"]["video_grid_thw"][0]
+
+    assert grid_t == expected_grid_t
+    assert video_tok_count == expected_toks_per_frame * grid_t
diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py
index 5f306f05d140e..7c9840790fe3e 100644
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@@ -937,7 +937,7 @@ class Glm4vProcessingInfo(BaseProcessingInfo):
                               total_frames: int) -> list[int]:
         video_processor = self.get_video_processor()
 
-        video_fps = metadata.get("fps", 2.0)
+        video_fps = metadata.get("fps", video_processor.fps)
         meta_frames = metadata.get("total_num_frames", total_frames)
         max_frame_idx = meta_frames - 1
         duration = metadata.get("duration",
@@ -1120,11 +1120,7 @@ class Glm4vMultiModalProcessor(BaseMultiModalProcessor[Glm4vProcessingInfo]):
                     video_placeholder,
                 )
 
-                grid_t = len(video_outputs["video_grid_thw"])
-                _, grid_h, grid_w = video_outputs["video_grid_thw"][0]
-                grid_thw = torch.tensor([[grid_t, grid_h, grid_w]])
-
-                video_grid_thw_lst.append(grid_thw)
+                video_grid_thw_lst.append(video_outputs["video_grid_thw"])
                 pixel_values_videos_lst.append(
                     video_outputs["pixel_values_videos"])
             video_outputs = dict(

From b879ecd6e2636b6af893052615693a51466381ec Mon Sep 17 00:00:00 2001
From: "rongfu.leng" <rongfu.leng@daocloud.io>
Date: Sat, 2 Aug 2025 01:09:36 +0800
Subject: [PATCH 134/224] [Bugfix] fix when skip tokenizer init (#21922)

Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
---
 tests/v1/engine/test_llm_engine.py | 26 ++++++++++++++++++++++++++
 vllm/v1/engine/processor.py        |  9 +++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/tests/v1/engine/test_llm_engine.py b/tests/v1/engine/test_llm_engine.py
index f37686317fd14..2848420c22085 100644
--- a/tests/v1/engine/test_llm_engine.py
+++ b/tests/v1/engine/test_llm_engine.py
@@ -213,3 +213,29 @@ def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
         assert len(num_accepted_tokens_per_pos) == 1
         assert isinstance(num_accepted_tokens_per_pos[0], Vector)
         assert len(num_accepted_tokens_per_pos[0].values) == 5
+
+
+@pytest.mark.parametrize("model", ["meta-llama/Llama-3.2-1B-Instruct"])
+def test_skip_tokenizer_initialization(model: str,
+                                       monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.setenv("VLLM_USE_V1", "1")
+    # This test checks if the flag skip_tokenizer_init skips the initialization
+    # of tokenizer and detokenizer. The generated output is expected to contain
+    # token ids.
+    llm = LLM(
+        model=model,
+        skip_tokenizer_init=True,
+        enforce_eager=True,
+    )
+    sampling_params = SamplingParams(prompt_logprobs=True, detokenize=True)
+
+    with pytest.raises(ValueError, match="cannot pass text prompts when"):
+        llm.generate("abc", sampling_params)
+
+    outputs = llm.generate({"prompt_token_ids": [1, 2, 3]},
+                           sampling_params=sampling_params)
+    assert len(outputs) > 0
+    completions = outputs[0].outputs
+    assert len(completions) > 0
+    assert completions[0].text == ""
+    assert completions[0].token_ids
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
index 0f2f404a130ef..224acc47feb27 100644
--- a/vllm/v1/engine/processor.py
+++ b/vllm/v1/engine/processor.py
@@ -89,6 +89,10 @@ class Processor:
             return
         if not params.allowed_token_ids:
             raise ValueError("allowed_token_ids is not None and empty!")
+        if self.tokenizer is None:
+            # When skip_tokenizer_init=True, we can't validate token IDs
+            # Skip validation and let the model handle invalid tokens
+            return
         tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
         vocab_size = len(tokenizer)
         if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids):
@@ -283,8 +287,9 @@ class Processor:
                     len(decoder_inputs["prompt_token_ids"]))
             sampling_params.update_from_generation_config(
                 self.generation_config_fields, eos_token_id)
-            sampling_params.update_from_tokenizer(
-                self.tokenizer.get_lora_tokenizer(lora_request))
+            if self.tokenizer is not None:
+                sampling_params.update_from_tokenizer(
+                    self.tokenizer.get_lora_tokenizer(lora_request))
         else:
             pooling_params = params.clone()
 

From d6664664b442cb236f8541a126e4076a5e12c56d Mon Sep 17 00:00:00 2001
From: Huzaifa Sidhpurwala <huzaifas@redhat.com>
Date: Fri, 1 Aug 2025 21:09:49 +0400
Subject: [PATCH 135/224] security policy: take 1 (#21119)

Signed-off-by: Huzaifa Sidhpurwala <huzaifas@redhat.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com>
---
 SECURITY.md | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 6053cfb41f35b..4f338557da1a0 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,13 +1,41 @@
 # Security Policy
 
-## Reporting a Vulnerability
+## Reporting security issues:
 
-If you believe you have found a security vulnerability in vLLM, we encourage you to let us know right away. We will investigate all legitimate reports and do our best to quickly fix the problem.
+Please report security issues privately using [the vulnerability submission form](https://github.com/vllm-project/vllm/security/advisories/new).
 
-Please report security issues privately using [the vulnerability submission form](https://github.com/vllm-project/vllm/security/advisories/new). Reports will then be triaged by the [vulnerability management team](https://docs.vllm.ai/en/latest/contributing/vulnerability_management.html).
+## Issue triage
 
----
+Reports will then be triaged by the [vulnerability management team](https://docs.vllm.ai/en/latest/contributing/vulnerability_management.html).
+
+## Threat model
 
 Please see the [Security Guide in the vLLM documentation](https://docs.vllm.ai/en/latest/usage/security.html) for more information on vLLM's security assumptions and recommendations.
 
 Please see [PyTorch's Security Policy](https://github.com/pytorch/pytorch/blob/main/SECURITY.md) for more information and recommendations on how to securely interact with models.
+
+## Issue severity
+
+We will determine the risk of each issue, taking into account our experience dealing with past issues, versions affected, common defaults, and use cases. We use the following severity categories:
+
+### CRITICAL Severity
+Vulnerabilities that allow remote attackers to execute arbitrary code, take full control of the system, or significantly compromise confidentiality, integrity, or availability without any interaction or privileges needed, examples include remote code execution via network, deserialization issues that allow exploit chains. Generally those issues which are rated as CVSS  ≥ 9.0.
+
+### HIGH Severity
+Serious security flaws that allow elevated impact—like RCE in specific, limited contexts or significant data loss—but require advanced conditions or some trust, examples include RCE in advanced deployment modes (e.g. multi-node), or high impact issues where some sort of privileged network access is required. These issues typically have CVSS scores between 7.0 and 8.9
+
+### MODERATE Severity
+Vulnerabilities that cause denial of service or partial disruption, but do not allow arbitrary code execution or data breach and have limited impact. These issues have a CVSS rating between 4.0 and 6.9
+
+### LOW Severity
+Minor issues such as informational disclosures, logging errors, non-exploitable flaws, or weaknesses that require local or high-privilege access and offer negligible impact. Examples include side channel attacks or hash collisions. These issues often have CVSS scores less than 4.0
+
+## Prenotification policy
+
+For certain security issues of CRITICAL, HIGH, or MODERATE severity level, we may prenotify certain organizations or vendors that ship vLLM. The purpose of this prenotification is to allow for a coordinated release of fixes for severe issues.
+
+* This prenotification will be in the form of a private email notification. It may also include adding security contacts to the GitHub security advisory, typically a few days before release.
+
+* If you wish to be added to the prenotification group, please send an email copying all the members of the [vulnerability management team](https://docs.vllm.ai/en/latest/contributing/vulnerability_management.html). Each vendor contact will be analyzed on a case-by-case basis.
+
+* We may withdraw organizations from receiving future prenotifications if they release fixes or any other information about issues before they are public. Group membership may also change based on policy refinements for who may be included.

From ac45c44d98e77f30e47b8fb69134f4635183070d Mon Sep 17 00:00:00 2001
From: Varun Sundar Rabindranath <varunsundar08@gmail.com>
Date: Fri, 1 Aug 2025 22:44:38 +0530
Subject: [PATCH 136/224] [Bugfix] [Performance] DeepEPHighThroughput +
 DeepSeek : Quant before Dispatch (#21837)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
---
 .../layers/fused_moe/deepep_ht_prepare_finalize.py  | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py
index 7016ff34c3a85..f6b62254e7b4c 100644
--- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py
@@ -144,12 +144,13 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
                 "apply_router_weight_on_input is only implemented for topk=1")
             a1 = a1 * topk_weights.to(a1.dtype)
 
-        if quant_config.per_act_token_quant:
+        if quant_config.is_block_quantized:
+            # Quant and Dispatch
             a1q, a1q_scale = moe_kernel_quantize_input(
                 a1,
                 a1_scale,
                 quant_dtype=quant_config.quant_dtype,
-                per_act_token_quant=True,
+                per_act_token_quant=quant_config.per_act_token_quant,
                 block_shape=quant_config.block_shape,
             )
             if a1q_scale is not None and a1q_scale.numel() == 1:
@@ -162,8 +163,10 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
                  rank_topk_weights=topk_weights,
                  num_experts=num_experts)
         else:
-            # DeepEP kernels only support dispatching per-token-quant
-            # quantization. dispatch in bfloat16.
+            # Dispatch and Quant
+            # DeepEP kernels only support dispatching block-quantized
+            # activation scales.
+            # Dispatch in bfloat16
             (expert_x, _, expert_tokens_meta, expert_topk_ids,
              expert_topk_weights) = self._do_dispatch(
                  tokens=a1,
@@ -171,7 +174,7 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
                  rank_topk_ids=topk_ids,
                  rank_topk_weights=topk_weights,
                  num_experts=num_experts)
-            # quantize now
+            # Quantize after dispatch.
             expert_x_scale = None
             if expert_x.numel() != 0:
                 expert_x, expert_x_scale = moe_kernel_quantize_input(

From 38c8bce8b652df87d111c04ddf849c38615000c7 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 1 Aug 2025 18:31:29 +0100
Subject: [PATCH 137/224] Enable headless models for pooling in the
 Transformers backend (#21767)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 tests/models/registry.py                   |  1 +
 tests/models/test_transformers.py          | 28 +++++++++++++++++-----
 vllm/config.py                             |  9 +++++--
 vllm/model_executor/models/registry.py     |  3 ++-
 vllm/model_executor/models/transformers.py | 12 ++++++++++
 5 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/tests/models/registry.py b/tests/models/registry.py
index 806342a57dfab..fdc7888c85efb 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -525,6 +525,7 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
 }
 
 _TRANSFORMERS_BACKEND_MODELS = {
+    "TransformersModel": _HfExamplesInfo("Qwen/Qwen3-Embedding-0.6B"),
     "TransformersForCausalLM": _HfExamplesInfo("hmellor/Ilama-3.2-1B", trust_remote_code=True),  # noqa: E501
     "TransformersForMultimodalLM": _HfExamplesInfo("OpenGVLab/InternVL3-1B-hf"),
 }
diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py
index 5b7d90dfb896d..66ff8f7a54d31 100644
--- a/tests/models/test_transformers.py
+++ b/tests/models/test_transformers.py
@@ -34,8 +34,7 @@ def check_implementation(
 
     with runner_test(model, **kwargs_test, **kwargs) as model_test:
         model_config = model_test.llm.llm_engine.model_config
-        assert model_config.architecture == (
-            model_config._get_transformers_backend_cls())
+        assert model_config.using_transformers_backend()
 
         outputs_test = model_test.generate_greedy_logprobs(*args)
 
@@ -135,8 +134,7 @@ def test_quantization(
             enforce_eager=True,
             **quantization_kwargs) as vllm_model:  # type: ignore[arg-type]
         model_config = vllm_model.llm.llm_engine.model_config
-        assert model_config.architecture == (
-            model_config._get_transformers_backend_cls())
+        assert model_config.using_transformers_backend()
 
         transformers_outputs = vllm_model.generate_greedy_logprobs(
             example_prompts, max_tokens=max_tokens, num_logprobs=num_logprobs)
@@ -149,6 +147,25 @@ def test_quantization(
     )
 
 
+@pytest.mark.parametrize(
+    "model",
+    [
+        # Layers live in `layers`
+        "Qwen/Qwen3-Embedding-0.6B",
+        # Layers live in `model.layers`
+        "meta-llama/Llama-3.2-1B-Instruct"
+    ],
+)
+def test_embed_loading(vllm_runner, model):
+    with vllm_runner(model,
+                     max_model_len=1024,
+                     enforce_eager=True,
+                     runner="pooling",
+                     model_impl="transformers") as model_test:
+        model_config = model_test.llm.llm_engine.model_config
+        assert model_config.using_transformers_backend()
+
+
 @pytest.mark.parametrize(
     "model",
     ["jason9693/Qwen2.5-1.5B-apeach"],
@@ -169,8 +186,7 @@ def test_classify(
                      dtype=dtype,
                      model_impl="transformers") as vllm_model:
         model_config = vllm_model.llm.llm_engine.model_config
-        assert model_config.architecture == (
-            model_config._get_transformers_backend_cls())
+        assert model_config.using_transformers_backend()
 
         vllm_outputs = vllm_model.classify(example_prompts)
 
diff --git a/vllm/config.py b/vllm/config.py
index 124d62b699771..dabb4b524dfd8 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -812,12 +812,17 @@ class ModelConfig:
     def _get_transformers_backend_cls(self) -> str:
         """Determine which Transformers backend class will be used if
         `model_impl` is set to `transformers` or `auto`."""
+        if getattr(self, "runner_type", self.runner) == "pooling":
+            return "TransformersModel"
         if self.hf_config != self.hf_text_config:
             # If 'hf_text_config' is the same as 'hf_config'. If not, it is
             # probably a composite config, i.e. multimodal
             return "TransformersForMultimodalLM"
-        else:
-            return "TransformersForCausalLM"
+        return "TransformersForCausalLM"
+
+    def using_transformers_backend(self) -> bool:
+        """Check if the model is using the Transformers backend class."""
+        return self.architecture == self._get_transformers_backend_cls()
 
     @property
     def registry(self):
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index 848c04b9b32f7..0c5d87a7dc472 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -270,8 +270,9 @@ _TRANSFORMERS_SUPPORTED_MODELS = {
 }
 
 _TRANSFORMERS_BACKEND_MODELS = {
-    "TransformersForMultimodalLM": ("transformers", "TransformersForMultimodalLM"), # noqa: E501
+    "TransformersModel": ("transformers", "TransformersModel"),
     "TransformersForCausalLM": ("transformers", "TransformersForCausalLM"),
+    "TransformersForMultimodalLM": ("transformers", "TransformersForMultimodalLM"), # noqa: E501
 }
 # yapf: enable
 
diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py
index e67548800c354..5059d1e1d9fea 100644
--- a/vllm/model_executor/models/transformers.py
+++ b/vllm/model_executor/models/transformers.py
@@ -651,6 +651,18 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP):
         return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
 
 
+@support_torch_compile
+class TransformersModel(TransformersBase):
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            # Add `model.` prefix for base model checkpoints
+            "": "model.",
+            # Remove `model.` from places it should not be
+            "model.model.": "model.",
+            "model.score": "score",
+        })
+
+
 @support_torch_compile
 class TransformersForCausalLM(TransformersBase):
 

From 8d705996dffbb2299750b7b2b50bbcd5ccb4a5ad Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Sat, 2 Aug 2025 01:35:30 +0800
Subject: [PATCH 138/224] [Misc] Minor enhancement of benchmark_moe (#22068)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 benchmarks/kernels/benchmark_moe.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py
index c350aaf5d3ad2..72250e2fb6d2b 100644
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@@ -22,6 +22,13 @@ from vllm.utils import FlexibleArgumentParser
 FP8_DTYPE = current_platform.fp8_dtype()
 
 
+def ensure_divisibility(numerator, denominator):
+    """Ensure that numerator is divisible by the denominator."""
+    assert numerator % denominator == 0, (
+        "intermediate_size {} is not divisible by tp {}.".format(numerator, denominator)
+    )
+
+
 class BenchmarkConfig(TypedDict):
     BLOCK_SIZE_M: int
     BLOCK_SIZE_N: int
@@ -603,7 +610,7 @@ def main(args: argparse.Namespace):
         topk = config.num_experts_per_tok
         intermediate_size = config.intermediate_size
         shard_intermediate_size = 2 * intermediate_size // args.tp_size
-
+    ensure_divisibility(intermediate_size, args.tp_size)
     hidden_size = config.hidden_size
     dtype = torch.float16 if current_platform.is_rocm() else config.torch_dtype
     use_fp8_w8a8 = args.dtype == "fp8_w8a8"

From 3277e8f9e19c396d6dd92a0901d2e3f8fb8982d4 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Fri, 1 Aug 2025 13:36:07 -0400
Subject: [PATCH 139/224] Fix pre-commit failure for SECURTIY.md (#22102)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 SECURITY.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/SECURITY.md b/SECURITY.md
index 4f338557da1a0..414669fb3712e 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,6 +1,6 @@
 # Security Policy
 
-## Reporting security issues:
+## Reporting security issues
 
 Please report security issues privately using [the vulnerability submission form](https://github.com/vllm-project/vllm/security/advisories/new).
 
@@ -19,15 +19,19 @@ Please see [PyTorch's Security Policy](https://github.com/pytorch/pytorch/blob/m
 We will determine the risk of each issue, taking into account our experience dealing with past issues, versions affected, common defaults, and use cases. We use the following severity categories:
 
 ### CRITICAL Severity
+
 Vulnerabilities that allow remote attackers to execute arbitrary code, take full control of the system, or significantly compromise confidentiality, integrity, or availability without any interaction or privileges needed, examples include remote code execution via network, deserialization issues that allow exploit chains. Generally those issues which are rated as CVSS  ≥ 9.0.
 
 ### HIGH Severity
+
 Serious security flaws that allow elevated impact—like RCE in specific, limited contexts or significant data loss—but require advanced conditions or some trust, examples include RCE in advanced deployment modes (e.g. multi-node), or high impact issues where some sort of privileged network access is required. These issues typically have CVSS scores between 7.0 and 8.9
 
 ### MODERATE Severity
+
 Vulnerabilities that cause denial of service or partial disruption, but do not allow arbitrary code execution or data breach and have limited impact. These issues have a CVSS rating between 4.0 and 6.9
 
 ### LOW Severity
+
 Minor issues such as informational disclosures, logging errors, non-exploitable flaws, or weaknesses that require local or high-privilege access and offer negligible impact. Examples include side channel attacks or hash collisions. These issues often have CVSS scores less than 4.0
 
 ## Prenotification policy

From 9659bc7f271ec640da780b5ca739e261764b954b Mon Sep 17 00:00:00 2001
From: Animesh Jain <jainanimesh2305@yahoo.com>
Date: Fri, 1 Aug 2025 10:38:52 -0700
Subject: [PATCH 140/224] [compile][startup] Disable C++ compilation of
 symbolic shapes (#20836)

Signed-off-by: Animesh Jain <anijain@umich.edu>
---
 vllm/compilation/decorators.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
index 1370862d580a5..0d2c432497c40 100644
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -267,8 +267,15 @@ def _support_torch_compile(
                     code.co_filename)
                 return inline_call(parent, func, args, kwargs)
 
-            with patch.object(InliningInstructionTranslator, 'inline_call',
-                              patched_inline_call):
+            # Disable the C++ compilation of symbolic shape guards. C++-fication
+            # of symbolic shape guards can improve guard overhead. But, since
+            # vllm skip guards anyways, setting this flag to False can improve
+            # compile time.
+            with torch._dynamo.config.patch("enable_cpp_symbolic_shape_guards",
+                                            False), patch.object(
+                                                InliningInstructionTranslator,
+                                                'inline_call',
+                                                patched_inline_call):
                 output = self.compiled_callable(*args, **kwargs)
             return output
 

From d331759488eb7627d2454549eeb01d14f83f1c41 Mon Sep 17 00:00:00 2001
From: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
Date: Fri, 1 Aug 2025 11:50:58 -0700
Subject: [PATCH 141/224] Introduce RayPPCommunicator for ray-based PP (#21660)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
---
 .../device_communicators/ray_communicator.py  | 257 ++++++++++++++++++
 vllm/envs.py                                  |   8 +
 vllm/executor/ray_distributed_executor.py     |  15 +
 3 files changed, 280 insertions(+)
 create mode 100644 vllm/distributed/device_communicators/ray_communicator.py

diff --git a/vllm/distributed/device_communicators/ray_communicator.py b/vllm/distributed/device_communicators/ray_communicator.py
new file mode 100644
index 0000000000000..e5ba297ebcc1b
--- /dev/null
+++ b/vllm/distributed/device_communicators/ray_communicator.py
@@ -0,0 +1,257 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import uuid
+from typing import Any, Optional
+
+import ray
+import torch
+from ray.exceptions import RayChannelError
+from ray.experimental.channel.communicator import (Communicator,
+                                                   TorchTensorAllocator)
+from torch.distributed import ReduceOp
+
+from vllm.distributed.device_communicators.base_device_communicator import (
+    DeviceCommunicatorBase)
+from vllm.distributed.parallel_state import get_pp_group
+from vllm.logger import init_logger
+from vllm.utils import current_stream
+
+logger = init_logger(__name__)
+
+
+class RayPPCommunicator(Communicator):
+    """
+    Communicator to be used for pipeline parallelism in Ray Compiled Graph.
+    This is wraps around the vLLM _PP GroupCoordinator.
+
+    This class is not thread-safe.
+    """
+
+    _comm: Optional[DeviceCommunicatorBase]
+
+    def __init__(
+        self,
+        world_size: int,
+        comm_id: Any,
+        rank: Optional[int],
+        actor_handles: list["ray.actor.ActorHandle"],
+        cuda_stream: Optional[torch.cuda.Stream],
+        use_communication_streams: bool = False,
+    ):
+        """
+        Initialize a RayPPCommunicator that can be used to communicate with
+        other Ray Compiled Graph actors for pipeline parallelism.
+
+        Args:
+            world_size: The number of participating actors.
+            comm_id: A unique communicator ID. This is just to conform with
+                the Ray Communicator API and is not used.
+            rank: The rank of this actor. If None, then the caller is not a
+                participant of the RayPPCommunicator group (e.g., the Ray
+                driver).
+            actor_handles: A list of actor handles.
+            cuda_stream: A CUDA stream to dispatch communication ops to. This
+                is not supported.
+            use_communication_streams: Whether to use communication streams.
+                This is not supported.
+        """
+        self._world_size = world_size
+        self._rank: Optional[int] = None
+        self._actor_handles = actor_handles
+        if use_communication_streams:
+            raise NotImplementedError(
+                "use_communication_streams is not supported")
+        if cuda_stream is not None and cuda_stream != current_stream():
+            raise ValueError(
+                "cuda_stream other than the current stream is not supported")
+
+        if rank is not None:
+            # Rank is not None, this is Ray worker
+            assert ray.get_gpu_ids(), "RayPPCommunicator has no GPUs assigned"
+
+            self._comm = get_pp_group().device_communicator
+
+            # Since we wrap around the vLLM _PP communicator, we use
+            # the rank from the vLLM communicator, and ignore the rank
+            # passed in from Ray.
+            # TODO(rui): refactor the Ray Communicator API so that
+            # it also supports no rank passed in.
+            self._rank = self._comm.rank_in_group
+
+            self._build_actor_rank_mapping()
+        else:
+            # Rank is None, this is Ray driver
+            self._comm = None
+
+        self._closed = False
+
+    def _build_actor_rank_mapping(self):
+        """
+        Use collective communication to build a mapping from actor IDs to ranks.
+        This should be called once during initialization.
+        """
+        if self._comm is None:
+            return {}
+
+        current_actor = ray.get_runtime_context().current_actor
+        actor_id_str = current_actor._actor_id.hex()
+
+        # Ray actor IDs are 32-character hex strings (128 bits)
+        ACTOR_ID_LEN = 32
+        actor_id_bytes = actor_id_str.encode('utf-8')
+        assert len(
+            actor_id_bytes
+        ) == ACTOR_ID_LEN, f"Unexpected actor ID length: {len(actor_id_bytes)}"
+
+        actor_id_tensor = torch.frombuffer(
+            actor_id_bytes, dtype=torch.uint8).to(self._comm.device)
+
+        # All-gather full actor IDs from all actors
+        gathered_ids = self._comm.all_gather(actor_id_tensor, dim=0)
+
+        # Build mapping: actor_id -> device_comm_rank
+        self._actor_id_to_rank = {}
+        for rank in range(self._world_size):
+            start_idx = rank * ACTOR_ID_LEN
+            end_idx = (rank + 1) * ACTOR_ID_LEN
+            actor_bytes = gathered_ids[start_idx:end_idx].cpu().numpy(
+            ).tobytes()
+            actor_id = actor_bytes.decode('utf-8')
+            self._actor_id_to_rank[actor_id] = rank
+
+    def initialize(self, rank: int) -> None:
+        # No additional initialization is needed.
+        pass
+
+    def get_actor_handles(self) -> list["ray.actor.ActorHandle"]:
+        return self._actor_handles
+
+    def get_rank(self, actor: ray.actor.ActorHandle) -> int:
+        """
+        Return the given actor's rank using device communicator collective ops.
+        """
+        assert hasattr(self, '_actor_id_to_rank'), (
+            "Actor rank mapping not built. "
+            "This should have been done during initialization.")
+
+        actor_id_str = actor._actor_id.hex()
+
+        if actor_id_str in self._actor_id_to_rank:
+            return self._actor_id_to_rank[actor_id_str]  # type: ignore
+        else:
+            raise ValueError(f"Actor {actor} not found in communicator group")
+
+    def get_self_rank(self) -> Optional[int]:
+        """
+        Return this actor's rank.
+        """
+        return self._rank
+
+    def get_world_size(self) -> int:
+        """
+        Return the number of ranks in the RayPPCommunicator group.
+        """
+        return self._world_size
+
+    def send(self, buf: "torch.Tensor", peer_rank: int) -> None:
+        """
+        Send a torch.Tensor to a peer.
+
+        This returns when the send kernel has been queued, but the kernel may
+        not have completed. Therefore, the caller should ensure that there are
+        no concurrent writes to the sent `buf` until the send has finished.
+        That is, either all writes should be submitted on the current stream
+        (self._cuda_stream) or, if on a different stream, that stream should
+        synchronize with the current stream.
+
+        Args:
+            buf: The torch.Tensor to send. It should already be on this
+                actor's default device.
+            peer_rank: The rank of the actor to send to.
+        """
+        if self._closed:
+            raise RayChannelError("RayPPCommunicator has been destroyed.")
+
+        assert self._comm is not None
+        self._comm.send(buf, peer_rank)
+
+    def recv(
+        self,
+        shape: tuple[int],
+        dtype: "torch.dtype",
+        peer_rank: int,
+        allocator: TorchTensorAllocator,
+    ) -> "torch.Tensor":
+        """
+        Receive a torch.Tensor from a peer and synchronize the current stream.
+
+        After this call returns, the receive buffer is safe to read from from
+        any stream. An RayChannelError will be raised if an error occurred
+        (e.g., remote actor died), and the buffer is not safe to read.
+
+        Args:
+            shape: The shape of the tensor to receive.
+            dtype: The dtype of the tensor to receive.
+            peer_rank: The rank of the actor to receive from.
+            allocator: The allocator to use to create the received tensor.
+                This is ignored for this implementation.
+        """
+        if self._closed:
+            raise RayChannelError("RayPPCommunicator has been destroyed.")
+
+        assert self._comm is not None
+        size = torch.Size(shape)
+        buf = self._comm.recv(size, dtype, src=peer_rank)
+
+        # Buffer values are undefined if NCCL ops are aborted. Therefore, we
+        # need to synchronize here and check that the channel is still
+        # open to ensure that the receive buffer is valid.
+        # TODO(swang): Avoid CUDA synchronization.
+        current_stream().synchronize()
+
+        if self._closed:
+            raise RayChannelError("RayPPCommunicator has been destroyed.")
+        return buf
+
+    def allgather(
+        self,
+        send_buf: "torch.Tensor",
+        recv_buf: "torch.Tensor",
+    ):
+        raise NotImplementedError("allgather is not supported")
+
+    def allreduce(
+        self,
+        send_buf: "torch.Tensor",
+        recv_buf: "torch.Tensor",
+        op: ReduceOp = ReduceOp.SUM,
+    ):
+        raise NotImplementedError("allreduce is not supported")
+
+    def reducescatter(
+        self,
+        send_buf: "torch.Tensor",
+        recv_buf: "torch.Tensor",
+        op: ReduceOp = ReduceOp.SUM,
+    ):
+        raise NotImplementedError("reducescatter is not supported")
+
+    @property
+    def recv_stream(self):
+        return torch.cuda.StreamContext(current_stream())
+
+    @property
+    def send_stream(self):
+        return torch.cuda.StreamContext(current_stream())
+
+    def destroy(self) -> None:
+        # Just sets a flag, vLLM manages the lifecycle of the underlying
+        # _PP GroupCoordinator.
+        self._closed = True
+
+    def get_transport_name(self) -> str:
+        return "nccl"
+
+    @classmethod
+    def generate_communicator_id(cls) -> Any:
+        return uuid.uuid4()
diff --git a/vllm/envs.py b/vllm/envs.py
index 7553eccf16ea9..2fda2903179b5 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -55,6 +55,7 @@ if TYPE_CHECKING:
     VLLM_USE_RAY_COMPILED_DAG: bool = False
     VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: str = "auto"
     VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = False
+    VLLM_USE_RAY_WRAPPED_PP_COMM: bool = True
     VLLM_XLA_USE_SPMD: bool = False
     VLLM_WORKER_MULTIPROC_METHOD: str = "fork"
     VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets")
@@ -498,6 +499,13 @@ environment_variables: dict[str, Callable[[], Any]] = {
     lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM", "0"))
                  ),
 
+    # If the env var is set, it uses a Ray Communicator wrapping
+    # vLLM's pipeline parallelism communicator to interact with Ray's
+    # Compiled Graph. Otherwise, it uses Ray's NCCL communicator.
+    # This flag is ignored if VLLM_USE_RAY_COMPILED_DAG is not set.
+    "VLLM_USE_RAY_WRAPPED_PP_COMM":
+    lambda: bool(int(os.getenv("VLLM_USE_RAY_WRAPPED_PP_COMM", "1"))),
+
     # Use dedicated multiprocess context for workers.
     # Both spawn and fork work
     "VLLM_WORKER_MULTIPROC_METHOD":
diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py
index e9ad62aeb99a8..37c3fe59c65dd 100644
--- a/vllm/executor/ray_distributed_executor.py
+++ b/vllm/executor/ray_distributed_executor.py
@@ -608,6 +608,21 @@ class RayDistributedExecutor(DistributedExecutorBase):
 
             forward_dag = MultiOutputNode(outputs)
 
+        if envs.VLLM_USE_RAY_WRAPPED_PP_COMM:
+            from ray.experimental.channel.accelerator_context import (
+                register_accelerator_context)
+
+            from vllm.distributed.device_communicators.ray_communicator import (
+                RayPPCommunicator)
+            register_accelerator_context(torch_module_name="cuda",
+                                         communicator_cls=RayPPCommunicator)
+            logger.info("Using RayPPCommunicator "
+                        "(which wraps vLLM _PP GroupCoordinator) "
+                        "for Ray Compiled Graph communication.")
+        else:
+            logger.info("Using Ray's NCCL communicator for "
+                        "Ray Compiled Graph communication.")
+
         return forward_dag.experimental_compile(
             enable_asyncio=enable_asyncio,
             _overlap_gpu_communication=envs.

From d84b97a3e33ed79aaba7552bfe5889d363875562 Mon Sep 17 00:00:00 2001
From: XiongfeiWei <isaacwxf23@gmail.com>
Date: Fri, 1 Aug 2025 11:56:08 -0700
Subject: [PATCH 142/224] Add lora test for tp>1 case for TPU. (#21970)

Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com>
---
 tests/tpu/lora/test_lora.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/tests/tpu/lora/test_lora.py b/tests/tpu/lora/test_lora.py
index b26bdd34d890e..4c47b8c43caff 100644
--- a/tests/tpu/lora/test_lora.py
+++ b/tests/tpu/lora/test_lora.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import pytest
+from torch_xla._internal import tpu
 
 import vllm
 from vllm.lora.request import LoRARequest
@@ -27,25 +28,31 @@ def use_v1_only(monkeypatch: pytest.MonkeyPatch):
         yield
 
 
-def setup_vllm(num_loras: int) -> vllm.LLM:
+def setup_vllm(num_loras: int, tp: int) -> vllm.LLM:
     return vllm.LLM(model="Qwen/Qwen2.5-3B-Instruct",
                     num_scheduler_steps=1,
                     max_model_len=256,
                     max_seq_len_to_capture=256,
                     max_num_seqs=8,
+                    tensor_parallel_size=tp,
                     enable_lora=True,
                     max_loras=num_loras,
                     max_lora_rank=8)
 
 
-def test_single_lora():
+TPU_TENSOR_PARALLEL_SIZES = [1, tpu.num_available_chips()
+                             ] if tpu.num_available_chips() > 1 else [1]
+
+
+@pytest.mark.parametrize("tp", TPU_TENSOR_PARALLEL_SIZES)
+def test_single_lora(tp: int):
     """
     This test ensures we can run a single LoRA adapter on the TPU backend.
     We run "Username6568/Qwen2.5-3B-Instruct-1_plus_1_equals_1_adapter" which
     will force Qwen2.5-3B-Instruct to claim 1+1=1.
     """
 
-    llm = setup_vllm(1)
+    llm = setup_vllm(1, tp)
 
     prompt = "What is 1+1? \n"
 
@@ -63,7 +70,8 @@ def test_single_lora():
     assert int(answer) == 1
 
 
-def test_lora_hotswapping():
+@pytest.mark.parametrize("tp", TPU_TENSOR_PARALLEL_SIZES)
+def test_lora_hotswapping(tp: int):
     """
     This test ensures we can run multiple LoRA adapters on the TPU backend, even
     if we only have space to store 1.
@@ -79,7 +87,7 @@ def test_lora_hotswapping():
         for i in range(1, 5)
     ]
 
-    llm = setup_vllm(1)
+    llm = setup_vllm(1, tp)
 
     prompt = "What is 1+1? \n"
 
@@ -94,7 +102,8 @@ def test_lora_hotswapping():
         assert int(answer) == i + 1
 
 
-def test_multi_lora():
+@pytest.mark.parametrize("tp", TPU_TENSOR_PARALLEL_SIZES)
+def test_multi_lora(tp: int):
     """
     This test ensures we can run multiple LoRA adapters on the TPU backend, when
     we have enough space to store all of them.
@@ -109,7 +118,7 @@ def test_multi_lora():
         for i in range(1, 5)
     ]
 
-    llm = setup_vllm(4)
+    llm = setup_vllm(4, tp)
 
     prompt = "What is 1+1? \n"
 

From 881e1af43a1bb7b4bedd373e413eb7ad9dc9f920 Mon Sep 17 00:00:00 2001
From: Nick Hill <nhill@redhat.com>
Date: Fri, 1 Aug 2025 22:40:45 +0100
Subject: [PATCH 143/224] [BugFix] Harden distributed DP startup (#21538)

Signed-off-by: Nick Hill <nhill@redhat.com>
---
 vllm/utils/__init__.py        |  3 ++
 vllm/v1/engine/coordinator.py | 12 +++++++
 vllm/v1/engine/core.py        | 61 +++++++++++++++++++++++------------
 3 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index 7405f3986df8d..0d3fa6b059beb 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -2794,6 +2794,9 @@ def make_zmq_socket(
     if linger is not None:
         socket.setsockopt(zmq.LINGER, linger)
 
+    if socket_type == zmq.XPUB:
+        socket.setsockopt(zmq.XPUB_VERBOSE, True)
+
     # Determine if the path is a TCP socket with an IPv6 address.
     # Enable IPv6 on the zmq socket if so.
     scheme, host, _ = split_zmq_path(path)
diff --git a/vllm/v1/engine/coordinator.py b/vllm/v1/engine/coordinator.py
index 440628576bcb7..8d8d1689e61e3 100644
--- a/vllm/v1/engine/coordinator.py
+++ b/vllm/v1/engine/coordinator.py
@@ -172,6 +172,18 @@ class DPCoordinatorProc:
                 bind=True,
         ) as publish_back:
 
+            # Wait until all engines subscribe.
+            for _ in self.engines:
+                if publish_back.recv() != b'\x01':
+                    logger.error(
+                        "DP Coordinator received unexpected message while "
+                        "waiting for engines to subscribe")
+                    return
+            # Send ready message to engines.
+            publish_back.send(b"READY")
+
+            logger.info("All engine subscriptions received by DP coordinator")
+
             poller = zmq.Poller()
             poller.register(publish_front, zmq.POLLIN)
             poller.register(output_back, zmq.POLLIN)
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 6ae5736df98b8..0a889b2a0a184 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -461,8 +461,11 @@ class EngineCoreProc(EngineCore):
             self.has_coordinator = addresses.coordinator_output is not None
             self.frontend_stats_publish_address = (
                 addresses.frontend_stats_publish_address)
+            logger.debug("Has DP Coordinator: %s, stats publish address: %s",
+                         self.has_coordinator,
+                         self.frontend_stats_publish_address)
             # Only publish request queue stats to coordinator for "internal"
-            # LB mode.
+            # and "hybrid" LB modes .
             self.publish_dp_lb_stats = (
                 self.has_coordinator
                 and not vllm_config.parallel_config.data_parallel_external_lb)
@@ -472,25 +475,38 @@ class EngineCoreProc(EngineCore):
             super().__init__(vllm_config, executor_class, log_stats,
                              executor_fail_callback)
 
+            # Background Threads and Queues for IO. These enable us to
+            # overlap ZMQ socket IO with GPU since they release the GIL,
+            # and to overlap some serialization/deserialization with the
+            # model forward pass.
+            # Threads handle Socket <-> Queues and core_busy_loop uses Queue.
+            ready_event = threading.Event()
+            input_thread = threading.Thread(target=self.process_input_sockets,
+                                            args=(addresses.inputs,
+                                                  addresses.coordinator_input,
+                                                  identity, ready_event),
+                                            daemon=True)
+            input_thread.start()
+
+            self.output_thread = threading.Thread(
+                target=self.process_output_sockets,
+                args=(addresses.outputs, addresses.coordinator_output,
+                      self.engine_index),
+                daemon=True)
+            self.output_thread.start()
+
+            # Don't complete handshake until DP coordinator ready message is
+            # received.
+            while not ready_event.wait(timeout=10):
+                if not input_thread.is_alive():
+                    raise RuntimeError(
+                        "Input socket thread died during startup")
+                assert addresses.coordinator_input is not None
+                logger.info("Waiting for READY message from DP Coordinator...")
+
         self.step_fn = (self.step if self.batch_queue is None else
                         self.step_with_batch_queue)
 
-        # Background Threads and Queues for IO. These enable us to
-        # overlap ZMQ socket IO with GPU since they release the GIL,
-        # and to overlap some serialization/deserialization with the
-        # model forward pass.
-        # Threads handle Socket <-> Queues and core_busy_loop uses Queue.
-        threading.Thread(target=self.process_input_sockets,
-                         args=(addresses.inputs, addresses.coordinator_input,
-                               identity),
-                         daemon=True).start()
-        self.output_thread = threading.Thread(
-            target=self.process_output_sockets,
-            args=(addresses.outputs, addresses.coordinator_output,
-                  self.engine_index),
-            daemon=True)
-        self.output_thread.start()
-
     @contextmanager
     def _perform_handshakes(
         self,
@@ -505,10 +521,10 @@ class EngineCoreProc(EngineCore):
 
         For DP=1 or offline mode, this is with the colocated front-end process.
 
-        For DP>1 with internal loadbalancing this is with the shared front-end
+        For DP>1 with internal load-balancing this is with the shared front-end
         process which may reside on a different node.
 
-        For DP>1 with external or hybrid loadbalancing, two handshakes are
+        For DP>1 with external or hybrid load-balancing, two handshakes are
         performed:
             - With the rank 0 front-end process which retrieves the
               DP Coordinator ZMQ addresses and DP process group address.
@@ -772,7 +788,7 @@ class EngineCoreProc(EngineCore):
 
     def process_input_sockets(self, input_addresses: list[str],
                               coord_input_address: Optional[str],
-                              identity: bytes):
+                              identity: bytes, ready_event: threading.Event):
         """Input socket IO thread."""
 
         # Msgpack serialization decoding.
@@ -809,9 +825,14 @@ class EngineCoreProc(EngineCore):
                 # back to us.
                 input_socket.send(b'')
                 poller.register(input_socket, zmq.POLLIN)
+
             if coord_socket is not None:
+                # Wait for ready message from coordinator.
+                assert coord_socket.recv() == b"READY"
                 poller.register(coord_socket, zmq.POLLIN)
 
+            ready_event.set()
+            del ready_event
             while True:
                 for input_socket, _ in poller.poll():
                     # (RequestType, RequestData)

From 88faa466d788e25082c02dc9688931d7976361f9 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Fri, 1 Aug 2025 19:18:38 -0400
Subject: [PATCH 144/224] [CI] Initial tests for SM100 Blackwell runner
 (#21877)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 .buildkite/test-pipeline.yaml                 | 24 ++++++++++++++++---
 tests/compile/test_fusion_all_reduce.py       | 15 +++++++-----
 .../quantization/test_cutlass_scaled_mm.py    |  5 ----
 3 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 598fd5762985e..cc1223d4c4653 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -647,13 +647,31 @@ steps:
 - label: Blackwell Test
   working_dir: "/vllm-workspace/"
   gpu: b200
-  optional: true
+  # optional: true
   source_file_dependencies:
-  - csrc/
-  - vllm/
+  - csrc/quantization/fp4/
+  - csrc/attention/mla/
+  - csrc/quantization/cutlass_w8a8/moe/
+  - vllm/model_executor/layers/fused_moe/cutlass_moe.py
+  - vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py
+  - vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py
+  - vllm/v1/attention/backends/flashinfer.py
+  - vllm/compilation/fusion.py
   commands:
     - nvidia-smi
     - python3 examples/offline_inference/basic/chat.py
+    # Attention
+    # num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353
+    - pytest -v -s tests/kernels/attention/test_flashinfer.py -k 'not num_heads2'
+    - pytest -v -s tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py
+    - pytest -v -s tests/kernels/test_cutlass_mla_decode.py
+    # Quantization
+    - pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8'
+    - pytest -v -s tests/kernels/quantization/test_nvfp4_quant.py
+    - pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
+    - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
+    # Fusion
+    - pytest -v -s tests/compile/test_fusion_all_reduce.py
 
 #####  1 GPU test  #####
 #####  multi gpus test  #####
diff --git a/tests/compile/test_fusion_all_reduce.py b/tests/compile/test_fusion_all_reduce.py
index b394e0035c689..4c3cf6c2a10cf 100644
--- a/tests/compile/test_fusion_all_reduce.py
+++ b/tests/compile/test_fusion_all_reduce.py
@@ -136,12 +136,15 @@ class TestAllReduceFusedAddRMSNormStaticQuantFP4Model(torch.nn.Module):
 
 
 @multi_gpu_test(num_gpus=2)
-@pytest.mark.parametrize("test_model", [
-    TestAllReduceRMSNormModel,
-    TestAllReduceFusedAddRMSNormModel,
-    TestAllReduceFusedAddRMSNormStaticQuantFP8Model,
-    TestAllReduceFusedAddRMSNormStaticQuantFP4Model,
-])
+@pytest.mark.parametrize(
+    "test_model",
+    [
+        TestAllReduceRMSNormModel,
+        TestAllReduceFusedAddRMSNormModel,
+        TestAllReduceFusedAddRMSNormStaticQuantFP8Model,
+        # TODO: Enable with torch==2.8.0
+        # TestAllReduceFusedAddRMSNormStaticQuantFP4Model,
+    ])
 @pytest.mark.parametrize("batch_size", [8])
 @pytest.mark.parametrize("seq_len", [8])
 @pytest.mark.parametrize("hidden_size", [16])
diff --git a/tests/kernels/quantization/test_cutlass_scaled_mm.py b/tests/kernels/quantization/test_cutlass_scaled_mm.py
index 544e6dc197904..8730eeaaa761c 100644
--- a/tests/kernels/quantization/test_cutlass_scaled_mm.py
+++ b/tests/kernels/quantization/test_cutlass_scaled_mm.py
@@ -559,8 +559,6 @@ def test_cutlass_fp8_group_gemm(num_experts: int, per_act_token: bool,
         m_a_scales = m_g if per_act_token else 1
         n_b_scales = n_g if per_out_ch else 1
 
-        print("shape:", m_g, n_g, k_g)
-
         # Create group-specific A and B (FP8) and output (FP16/FP32)
         a_g = to_fp8(torch.randn((m_g, k_g), device=device))
         b_g = to_fp8(torch.randn((n_g, k_g), device=device).t())
@@ -639,7 +637,4 @@ def test_cutlass_fp8_group_gemm(num_experts: int, per_act_token: bool,
     for g in range(num_experts):
         baseline = baseline_tensors[g]
         c = out_tensors_stacked[expert_offsets[g]:expert_offsets[g + 1]]
-        print(baseline)
-        print(c)
-        print("*")
         torch.testing.assert_close(c, baseline, rtol=1e-2, atol=5e-4)

From eefbf4a68b7b0a5b8364a59647906be1b7f043e2 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Fri, 1 Aug 2025 19:18:51 -0400
Subject: [PATCH 145/224] [Perf] Optimize `reshape_and_cache_flash` CUDA Kernel
 (#22036)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 .../benchmark_reshape_and_cache_flash.py      | 156 ++++++++++++++++++
 csrc/cache_kernels.cu                         |  92 ++++++++---
 2 files changed, 225 insertions(+), 23 deletions(-)
 create mode 100644 benchmarks/kernels/benchmark_reshape_and_cache_flash.py

diff --git a/benchmarks/kernels/benchmark_reshape_and_cache_flash.py b/benchmarks/kernels/benchmark_reshape_and_cache_flash.py
new file mode 100644
index 0000000000000..d4648c18f31d5
--- /dev/null
+++ b/benchmarks/kernels/benchmark_reshape_and_cache_flash.py
@@ -0,0 +1,156 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from __future__ import annotations
+
+import random
+import time
+
+import torch
+from tabulate import tabulate
+
+from vllm import _custom_ops as ops
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+from vllm.utils import (
+    STR_DTYPE_TO_TORCH_DTYPE,
+    FlexibleArgumentParser,
+    create_kv_caches_with_random_flash,
+)
+
+logger = init_logger(__name__)
+
+
+@torch.inference_mode()
+def run_benchmark(
+    num_tokens: int,
+    num_heads: int,
+    head_size: int,
+    block_size: int,
+    num_blocks: int,
+    dtype: torch.dtype,
+    kv_cache_dtype: str,
+    kv_cache_layout: str,
+    num_iters: int,
+    device: str = "cuda",
+) -> float:
+    """Return latency (seconds) for given num_tokens."""
+
+    if kv_cache_dtype == "fp8" and head_size % 16:
+        raise ValueError("fp8 kv-cache requires head_size to be a multiple of 16.")
+
+    current_platform.seed_everything(42)
+    torch.set_default_device(device)
+
+    # create random key / value tensors [T, H, D].
+    key = torch.randn(num_tokens, num_heads, head_size, dtype=dtype, device=device)
+    value = torch.randn_like(key)
+
+    # prepare the slot mapping.
+    # each token is assigned a unique slot in the KV-cache.
+    num_slots = block_size * num_blocks
+    if num_tokens > num_slots:
+        raise ValueError("num_tokens cannot exceed the total number of cache slots")
+    slot_mapping_lst = random.sample(range(num_slots), num_tokens)
+    slot_mapping = torch.tensor(slot_mapping_lst, dtype=torch.long, device=device)
+
+    key_caches, value_caches = create_kv_caches_with_random_flash(
+        num_blocks,
+        block_size,
+        1,  # num_layers
+        num_heads,
+        head_size,
+        kv_cache_dtype,
+        dtype,
+        device=device,
+        cache_layout=kv_cache_layout,
+    )
+    key_cache, value_cache = key_caches[0], value_caches[0]
+
+    # compute per-kernel scaling factors for fp8 conversion (if used).
+    k_scale = (key.amax() / 64.0).to(torch.float32)
+    v_scale = (value.amax() / 64.0).to(torch.float32)
+
+    def run_cuda_benchmark(n_iters: int) -> float:
+        nonlocal key, value, key_cache, value_cache, slot_mapping
+        torch.cuda.synchronize()
+        start = time.perf_counter()
+        for _ in range(n_iters):
+            ops.reshape_and_cache_flash(
+                key,
+                value,
+                key_cache,
+                value_cache,
+                slot_mapping,
+                kv_cache_dtype,
+                k_scale,
+                v_scale,
+            )
+        torch.cuda.synchronize()
+        end = time.perf_counter()
+        return (end - start) / n_iters
+
+    # warm-up
+    run_cuda_benchmark(3)
+
+    lat = run_cuda_benchmark(num_iters)
+
+    # free tensors to mitigate OOM when sweeping
+    del key, value, key_cache, value_cache, slot_mapping
+    torch.cuda.empty_cache()
+
+    return lat
+
+
+def main(args):
+    rows = []
+    for layout in ["NHD", "HND"]:
+        for exp in range(1, 17):
+            n_tok = 2**exp
+            lat = run_benchmark(
+                num_tokens=n_tok,
+                num_heads=args.num_heads,
+                head_size=args.head_size,
+                block_size=args.block_size,
+                num_blocks=args.num_blocks,
+                dtype=STR_DTYPE_TO_TORCH_DTYPE[args.dtype],
+                kv_cache_dtype=args.kv_cache_dtype,
+                kv_cache_layout=layout,
+                num_iters=args.iters,
+                device="cuda",
+            )
+            rows.append([n_tok, layout, f"{lat * 1e6:.3f}"])
+
+    print(tabulate(rows, headers=["num_tokens", "layout", "latency (µs)"]))
+
+
+if __name__ == "__main__":
+    parser = FlexibleArgumentParser()
+
+    parser.add_argument("--num-heads", type=int, default=128)
+    parser.add_argument(
+        "--head-size",
+        type=int,
+        choices=[64, 80, 96, 112, 120, 128, 192, 256],
+        default=128,
+    )
+    parser.add_argument("--block-size", type=int, choices=[16, 32], default=16)
+    parser.add_argument("--num-blocks", type=int, default=128 * 512)
+
+    parser.add_argument(
+        "--dtype",
+        type=str,
+        choices=["half", "bfloat16", "float"],
+        default="bfloat16",
+    )
+
+    parser.add_argument(
+        "--kv-cache-dtype",
+        type=str,
+        choices=["auto", "fp8"],
+        default="auto",
+    )
+
+    parser.add_argument("--iters", type=int, default=100)
+    args = parser.parse_args()
+
+    main(args)
diff --git a/csrc/cache_kernels.cu b/csrc/cache_kernels.cu
index 88559c8fe7183..131dcb15cd7e9 100644
--- a/csrc/cache_kernels.cu
+++ b/csrc/cache_kernels.cu
@@ -5,6 +5,7 @@
 #include "cuda_utils.h"
 #include "cuda_compat.h"
 #include "dispatch_utils.h"
+#include "quantization/vectorization_utils.cuh"
 
 #ifdef USE_ROCM
   #include "quantization/fp8/amd/quant_utils.cuh"
@@ -261,14 +262,26 @@ __global__ void reshape_and_cache_kernel(
   }
 }
 
+// Used by vectorization_utils to copy/convert one element
+template <typename OutT, typename InT, Fp8KVCacheDataType kv_dt>
+struct CopyWithScaleOp {
+  float scale;
+
+  __device__ __forceinline__ void operator()(OutT& dst, const InT src) const {
+    if constexpr (kv_dt == Fp8KVCacheDataType::kAuto) {
+      dst = static_cast<OutT>(src);
+    } else {
+      dst = fp8::scaled_convert<OutT, InT, kv_dt>(src, scale);
+    }
+  }
+};
+
 template <typename scalar_t, typename cache_t, Fp8KVCacheDataType kv_dt>
 __global__ void reshape_and_cache_flash_kernel(
     const scalar_t* __restrict__ key,    // [num_tokens, num_heads, head_size]
     const scalar_t* __restrict__ value,  // [num_tokens, num_heads, head_size]
-    cache_t* __restrict__ key_cache,     // [num_blocks, block_size, num_heads,
-                                         // head_size]
-    cache_t* __restrict__ value_cache,   // [num_blocks, block_size, num_heads,
-                                         // head_size]
+    cache_t* __restrict__ key_cache,     // NHD or HND, shape see comments below
+    cache_t* __restrict__ value_cache,   // same above
     const int64_t* __restrict__ slot_mapping,  // [num_tokens]
     const int64_t block_stride, const int64_t page_stride,
     const int64_t head_stride, const int64_t key_stride,
@@ -282,25 +295,58 @@ __global__ void reshape_and_cache_flash_kernel(
   }
   const int64_t block_idx = slot_idx / block_size;
   const int64_t block_offset = slot_idx % block_size;
-  const int n = num_heads * head_size;
-  for (int i = threadIdx.x; i < n; i += blockDim.x) {
-    const int64_t src_key_idx = token_idx * key_stride + i;
-    const int64_t src_value_idx = token_idx * value_stride + i;
-    const int head_idx = i / head_size;
-    const int head_offset = i % head_size;
-    const int64_t tgt_key_value_idx = block_idx * block_stride +
-                                      block_offset * page_stride +
-                                      head_idx * head_stride + head_offset;
-    scalar_t tgt_key = key[src_key_idx];
-    scalar_t tgt_value = value[src_value_idx];
-    if constexpr (kv_dt == Fp8KVCacheDataType::kAuto) {
-      key_cache[tgt_key_value_idx] = tgt_key;
-      value_cache[tgt_key_value_idx] = tgt_value;
-    } else {
-      key_cache[tgt_key_value_idx] =
-          fp8::scaled_convert<cache_t, scalar_t, kv_dt>(tgt_key, *k_scale);
-      value_cache[tgt_key_value_idx] =
-          fp8::scaled_convert<cache_t, scalar_t, kv_dt>(tgt_value, *v_scale);
+  const int n_elems = num_heads * head_size;
+
+  // pointers to the beginning of the source row for this token.
+  const scalar_t* __restrict__ key_src = key + token_idx * key_stride;
+  const scalar_t* __restrict__ value_src = value + token_idx * value_stride;
+
+  // find the start position inside the kv-cache for this token.
+  cache_t* __restrict__ key_dst =
+      key_cache + block_idx * block_stride + block_offset * page_stride;
+  cache_t* __restrict__ value_dst =
+      value_cache + block_idx * block_stride + block_offset * page_stride;
+
+  // this is true for the NHD layout where `head_stride == head_size`
+  const bool is_contiguous_heads = (head_stride == head_size);
+
+  float k_scale_val = (kv_dt == Fp8KVCacheDataType::kAuto) ? 0.f : *k_scale;
+  float v_scale_val = (kv_dt == Fp8KVCacheDataType::kAuto) ? 0.f : *v_scale;
+  constexpr int VEC_SIZE = (sizeof(scalar_t) == 2) ? 8 : 4;
+  CopyWithScaleOp<cache_t, scalar_t, kv_dt> k_op{k_scale_val};
+  CopyWithScaleOp<cache_t, scalar_t, kv_dt> v_op{v_scale_val};
+  if (is_contiguous_heads) {
+    // NHD layout
+    // kv cache: [num_blocks, block_size, num_heads, head_size]
+    vectorize_with_alignment<VEC_SIZE>(key_src, key_dst, n_elems, threadIdx.x,
+                                       blockDim.x, k_op);
+
+    vectorize_with_alignment<VEC_SIZE>(value_src, value_dst, n_elems,
+                                       threadIdx.x, blockDim.x, v_op);
+
+  } else {
+    // HND layout: heads are strided, but each head_size segment is contiguous
+    // kv cache: [num_blocks, num_heads, block_size, head_size]
+    const int lane = threadIdx.x & 31;     // 0..31 within warp
+    const int warp_id = threadIdx.x >> 5;  // warp index within block
+    const int warps_per_block = blockDim.x >> 5;
+
+    for (int head = warp_id; head < num_heads; head += warps_per_block) {
+      const scalar_t* __restrict__ k_src_h = key_src + head * head_size;
+      const scalar_t* __restrict__ v_src_h = value_src + head * head_size;
+
+      cache_t* __restrict__ k_dst_h =
+          key_dst + static_cast<int64_t>(head) * head_stride;
+      cache_t* __restrict__ v_dst_h =
+          value_dst + static_cast<int64_t>(head) * head_stride;
+
+      // within each head, let the 32 threads of the warp perform the vector
+      // copy
+      vectorize_with_alignment<VEC_SIZE>(k_src_h, k_dst_h, head_size, lane, 32,
+                                         k_op);
+
+      vectorize_with_alignment<VEC_SIZE>(v_src_h, v_dst_h, head_size, lane, 32,
+                                         v_op);
     }
   }
 }

From 3654847db5a9b9a0955f8416292d94fa1c827f77 Mon Sep 17 00:00:00 2001
From: JartX <sagformas@gmail.com>
Date: Sat, 2 Aug 2025 03:12:19 +0200
Subject: [PATCH 146/224] feat: Add Support GPTQ Quantization MOE on ROCM vllm
 serve (#21733)

---
 .../layers/fused_moe/fused_moe.py             |  4 ++--
 .../layers/quantization/gptq.py               | 22 ++++++++++++++++---
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index b69575c7e96de..56d1dfe135b3b 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -761,8 +761,8 @@ def get_moe_wna16_block_config(config: dict[str,
 
 def should_moe_wna16_use_cuda(num_valid_tokens: int, group_size: int,
                               num_experts: int, bit: int):
-    return bit == 4 and group_size in [32, 64, 128] and \
-        num_valid_tokens / num_experts <= 6
+    return current_platform.is_cuda() and bit == 4 and \
+        group_size in [32, 64, 128] and num_valid_tokens / num_experts <= 6
 
 
 def get_default_config(
diff --git a/vllm/model_executor/layers/quantization/gptq.py b/vllm/model_executor/layers/quantization/gptq.py
index d3ab1be3bee01..f18c936bac605 100644
--- a/vllm/model_executor/layers/quantization/gptq.py
+++ b/vllm/model_executor/layers/quantization/gptq.py
@@ -10,10 +10,11 @@ import torch
 from torch.nn.parameter import Parameter
 
 from vllm import _custom_ops as ops
+from vllm.model_executor.layers.fused_moe.layer import FusedMoE
 from vllm.model_executor.layers.linear import LinearMethodBase
 from vllm.model_executor.layers.quantization import QuantizationMethods
 from vllm.model_executor.layers.quantization.base_config import (
-    QuantizationConfig)
+    QuantizationConfig, QuantizeMethodBase)
 from vllm.model_executor.layers.quantization.utils.gptq_utils import (
     get_linear_quant_method)
 from vllm.model_executor.parameter import (ChannelQuantScaleParameter,
@@ -110,8 +111,23 @@ class GPTQConfig(QuantizationConfig):
         return cls(weight_bits, group_size, desc_act, lm_head_quantized,
                    dynamic)
 
-    def get_quant_method(self, layer: torch.nn.Module,
-                         prefix: str) -> Optional["GPTQLinearMethod"]:
+    def get_quant_method(
+        self, layer: torch.nn.Module, prefix: str
+    ) -> Optional[Union["GPTQLinearMethod", "QuantizeMethodBase"]]:
+        if isinstance(layer, FusedMoE):
+            # GPTQ MoE support: fall back to MoeWNA16 for broad compatibility
+            from .moe_wna16 import MoeWNA16Config
+
+            config = {
+                "quant_method": "gptq",
+                "bits": self.weight_bits,
+                "group_size": self.group_size,
+                "sym": True,  # GPTQ typically uses symmetric quantization
+                "lm_head": False,
+            }
+            return MoeWNA16Config.from_config(config).get_quant_method(
+                layer, prefix)
+
         return get_linear_quant_method(self, layer, prefix, GPTQLinearMethod)
 
 

From 23322431c802bb1057426c7ca31b22e859b51644 Mon Sep 17 00:00:00 2001
From: fhl2000 <63384265+fhl2000@users.noreply.github.com>
Date: Sat, 2 Aug 2025 09:49:34 +0800
Subject: [PATCH 147/224] [V1][CUDA] Full cudagraph support for FlashInfer
 (#21367)

---
 vllm/v1/attention/backends/flash_attn.py      |   7 +-
 vllm/v1/attention/backends/flashinfer.py      | 355 ++++++++++++++++--
 vllm/v1/attention/backends/mla/flashmla.py    |   4 +-
 .../attention/backends/mla/rocm_aiter_mla.py  |   4 +-
 vllm/v1/attention/backends/triton_attn.py     |   6 +-
 vllm/v1/attention/backends/utils.py           |  18 +-
 vllm/v1/worker/gpu_model_runner.py            |  24 +-
 vllm/v1/worker/gpu_worker.py                  |   5 +
 8 files changed, 376 insertions(+), 47 deletions(-)

diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index 3f9afa67aef70..f086bab2556eb 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -25,7 +25,8 @@ if is_flash_attn_varlen_func_available():
 from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.logger import init_logger
 from vllm.utils import cdiv
-from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
+from vllm.v1.attention.backends.utils import (AttentionCGSupport,
+                                              AttentionMetadataBuilder,
                                               CommonAttentionMetadata,
                                               get_kv_cache_layout)
 from vllm.v1.kv_cache_interface import AttentionSpec
@@ -153,7 +154,9 @@ def _get_sliding_window_configs(
 
 class FlashAttentionMetadataBuilder(
         AttentionMetadataBuilder[FlashAttentionMetadata]):
-    full_cudagraph_supported: ClassVar[bool] = get_flash_attn_version() == 3
+    attn_cudagraph_support: ClassVar[AttentionCGSupport] = \
+        AttentionCGSupport.NEVER if get_flash_attn_version() == 2 \
+        else AttentionCGSupport.ALWAYS
 
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                  vllm_config: VllmConfig, device: torch.device):
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index f8af1d7e41831..0aaad02b5b840 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -4,26 +4,28 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, ClassVar, Optional, Union
 
 import torch
 from flashinfer import (BatchDecodeWithPagedKVCacheWrapper,
                         BatchPrefillWithPagedKVCacheWrapper,
                         MultiLevelCascadeAttentionWrapper)
-from flashinfer.decode import trtllm_batch_decode_with_kv_cache
+from flashinfer.decode import (_get_range_buf, get_seq_lens,
+                               trtllm_batch_decode_with_kv_cache)
 
 import vllm.envs as envs
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionType)
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
-from vllm.utils import cdiv
+from vllm.utils import cdiv, is_pin_memory_available
 from vllm.utils.flashinfer import use_trtllm_decode_attention
 from vllm.v1.attention.backends.flash_attn import use_cascade_attention
 from vllm.v1.attention.backends.utils import (
-    AttentionMetadataBuilder, CommonAttentionMetadata, get_kv_cache_layout,
-    get_per_layer_parameters, infer_global_hyperparameters,
-    reorder_batch_to_split_decodes_and_prefills, split_decodes_and_prefills)
+    AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata,
+    get_kv_cache_layout, get_per_layer_parameters,
+    infer_global_hyperparameters, reorder_batch_to_split_decodes_and_prefills,
+    split_decodes_and_prefills)
 from vllm.v1.kv_cache_interface import AttentionSpec
 
 if TYPE_CHECKING:
@@ -174,26 +176,66 @@ class FlashInferMetadata:
 
 
 class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
+    attn_cudagraph_support: ClassVar[AttentionCGSupport] = \
+        AttentionCGSupport.PURE_DECODE_ONLY
 
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                  vllm_config: VllmConfig, device: torch.device):
         self.device = device
+        self.vllm_config = vllm_config
+        self.cache_config = vllm_config.cache_config
+        self.kv_cache_spec = kv_cache_spec
         self._workspace_buffer = None
         self._prefill_wrapper = None  # Wrapper for prefill/append
-        self._decode_wrapper = None  # Wrapper for decode
+        self._decode_wrapper = None  # Wrapper for decode (general shape)
+
+        self.compilation_config = vllm_config.compilation_config
+        max_num_pages_per_req = cdiv(vllm_config.model_config.max_model_len,
+                                     self.kv_cache_spec.block_size)
+        max_num_reqs = vllm_config.scheduler_config.max_num_seqs
+        max_num_pages = max_num_reqs * max_num_pages_per_req
+        self.enable_cuda_graph = self.compilation_config.full_cuda_graph
+        if self.enable_cuda_graph:
+            # For full cudagraph capture, one `decode_wrapper` for each batch
+            # size is needed for FlashInfer.
+            self._decode_wrappers_cudagraph: dict[
+                int, BatchDecodeWithPagedKVCacheWrapper] = {}
+            self._decode_cudagraph_max_bs = min(
+                max_num_reqs, self.compilation_config.max_capture_size)
+
         self._cascade_wrapper = None  # Wrapper for cascade attention
 
         # Global hyperparameters shared by all attention layers
         self.global_hyperparameters = infer_global_hyperparameters(
             get_per_layer_parameters(vllm_config, layer_names, FlashInferImpl))
 
-        self.vllm_config = vllm_config
-        self.cache_config = vllm_config.cache_config
-        self.kv_cache_spec = kv_cache_spec
-        max_num_blocks_per_request = cdiv(
-            vllm_config.model_config.max_model_len,
-            self.kv_cache_spec.block_size)
-        self.block_table_arange = torch.arange(max_num_blocks_per_request,
+        # Preparing persistent buffers (device-side)
+        self.paged_kv_indptr = torch.zeros(max_num_reqs + 1,
+                                           dtype=torch.int32,
+                                           device=self.device)
+        self.paged_kv_indices = torch.zeros(
+            max_num_pages,  # max num pages possible
+            dtype=torch.int32,
+            device=self.device)
+        self.paged_kv_last_page_len = torch.zeros(max_num_reqs,
+                                                  dtype=torch.int32,
+                                                  device=self.device)
+        # host-side buffer
+        pin_memory = is_pin_memory_available()
+        self.paged_kv_indptr_cpu = torch.zeros(max_num_reqs + 1,
+                                               dtype=torch.int32,
+                                               device="cpu",
+                                               pin_memory=pin_memory)
+        self.paged_kv_indices_cpu = torch.zeros(max_num_pages,
+                                                dtype=torch.int32,
+                                                device="cpu",
+                                                pin_memory=pin_memory)
+        self.paged_kv_last_page_len_cpu = torch.zeros(max_num_reqs,
+                                                      dtype=torch.int32,
+                                                      device="cpu",
+                                                      pin_memory=pin_memory)
+
+        self.block_table_arange = torch.arange(max_num_pages_per_req,
                                                dtype=torch.int32,
                                                device=self.device)
 
@@ -217,8 +259,16 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
                 self._get_workspace_buffer(), get_kv_cache_layout())
         return self._prefill_wrapper
 
-    def _get_decode_wrapper(self):
-        if self._decode_wrapper is None:
+    def _get_decode_wrapper(self,
+                            batch_size: int,
+                            use_cudagraph: bool = False):
+        if use_cudagraph:
+            decode_wrapper = self._decode_wrappers_cudagraph.get(
+                batch_size, None)
+        else:
+            decode_wrapper = self._decode_wrapper
+
+        if decode_wrapper is None:
             num_qo_heads = (
                 self.vllm_config.model_config.get_num_attention_heads(
                     self.vllm_config.parallel_config))
@@ -226,11 +276,32 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
                 self.vllm_config.parallel_config)
             use_tensor_cores = envs.VLLM_FLASHINFER_FORCE_TENSOR_CORES or (
                 num_qo_heads // num_kv_heads > 4)
-            self._decode_wrapper = BatchDecodeWithPagedKVCacheWrapper(
+
+            if use_cudagraph:
+                paged_kv_indptr = self.paged_kv_indptr[:batch_size + 1]
+                paged_kv_indices = self.paged_kv_indices
+                paged_kv_last_page_len = self.paged_kv_last_page_len[:
+                                                                     batch_size]
+            else:
+                paged_kv_indptr = None
+                paged_kv_indices = None
+                paged_kv_last_page_len = None
+            decode_wrapper = BatchDecodeWithPagedKVCacheWrapper(
                 self._get_workspace_buffer(),
                 get_kv_cache_layout(),
+                use_cuda_graph=use_cudagraph,
+                paged_kv_indptr_buffer=paged_kv_indptr,
+                paged_kv_indices_buffer=paged_kv_indices,
+                paged_kv_last_page_len_buffer=paged_kv_last_page_len,
                 use_tensor_cores=use_tensor_cores)
-        return self._decode_wrapper
+
+            # save the decode wrapper
+            if use_cudagraph:
+                self._decode_wrappers_cudagraph[batch_size] = decode_wrapper
+            else:
+                self._decode_wrapper = decode_wrapper
+
+        return decode_wrapper
 
     def _get_cascade_wrapper(self):
         if self._cascade_wrapper is None:
@@ -308,16 +379,44 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
                 )
 
             if num_decodes > 0:
-                attn_metadata.decode_wrapper = self._get_decode_wrapper()
+                pure_decode = num_prefills == 0
+                # possible required padding for cudagraph replay
+                use_cudagraph = (self.enable_cuda_graph and pure_decode and
+                                 num_decodes <= self._decode_cudagraph_max_bs)
+                if use_cudagraph:
+                    num_input_tokens = (
+                        self.vllm_config.pad_for_cudagraph(num_decodes))
+                    # Carefully fulfill the padding region with reasonable value
+                    # on cpu.
+                    # Make sure paged_kv_indptr_cpu is not decreasing
+                    self.paged_kv_indptr_cpu[1 + num_decodes:1 +
+                                             num_input_tokens].fill_(
+                                                 attn_metadata.
+                                                 paged_kv_indptr_cpu[-1])
+                    # Fill the remaining paged_kv_last_page_len_cpu with 1.
+                    # This is because flashinfer treats 0 as a full page
+                    # instead of empty.
+                    self.paged_kv_last_page_len_cpu[
+                        num_decodes:num_input_tokens].fill_(1)
+
+                else:
+                    num_input_tokens = num_decodes
+
+                attn_metadata.decode_wrapper = self._get_decode_wrapper(
+                    num_input_tokens, use_cudagraph)
                 if not use_trtllm_decode_attention(
                         num_decodes, attn_metadata.max_seq_len,
                         self.cache_config.cache_dtype,
                         attn_metadata.num_qo_heads, attn_metadata.num_kv_heads,
                         attn_metadata.head_dim):
-                    attn_metadata.decode_wrapper.plan(
-                        attn_metadata.paged_kv_indptr_cpu[:num_decodes + 1],
+                    # Use the persistent buffer with padding length,
+                    # instead of the same address but chunked version
+                    # in atten_metadata when using cudagraph.
+                    fast_plan_decode(
+                        attn_metadata.decode_wrapper,
+                        self.paged_kv_indptr_cpu[:num_input_tokens + 1],
                         attn_metadata.paged_kv_indices,
-                        attn_metadata.paged_kv_last_page_len_cpu[:num_decodes],
+                        self.paged_kv_last_page_len_cpu[:num_input_tokens],
                         attn_metadata.num_qo_heads,
                         attn_metadata.num_kv_heads,
                         attn_metadata.head_dim,
@@ -336,6 +435,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
               common_prefix_len: int,
               common_attn_metadata: CommonAttentionMetadata,
               fast_build: bool = False) -> FlashInferMetadata:
+        num_reqs = common_attn_metadata.num_reqs
         num_actual_tokens = common_attn_metadata.num_actual_tokens
         num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens =\
             split_decodes_and_prefills(common_attn_metadata)
@@ -381,18 +481,26 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
                                                        non_blocking=True)
         mask = (self.block_table_arange[:max_num_blocks].unsqueeze(0)
                 < block_table_bounds.unsqueeze(1))
-        paged_kv_indices = block_table_tensor[:, :max_num_blocks][mask]
+        # write self.paged_kv_indices inplace
+        num_actual_pages = torch.sum(mask)
+        paged_kv_indices = self.paged_kv_indices[:num_actual_pages]
+        torch.masked_select(block_table_tensor[:, :max_num_blocks],
+                            mask,
+                            out=paged_kv_indices)
 
-        paged_kv_indptr_cpu = torch.zeros(len(block_table_bounds_cpu) + 1,
-                                          dtype=torch.int32,
-                                          device='cpu')
-        paged_kv_indptr_cpu[1:] = block_table_bounds_cpu.cumsum(
-            dim=0, dtype=torch.int32)
+        # write self.paged_kv_indptr_cpu inplace (0-index is always 0)
+        torch.cumsum(block_table_bounds_cpu,
+                     dim=0,
+                     dtype=torch.int32,
+                     out=self.paged_kv_indptr_cpu[1:1 + num_reqs])
 
         paged_kv_last_page_len_cpu = seq_lens_cpu % page_size
-        paged_kv_last_page_len_cpu = torch.where(
-            paged_kv_last_page_len_cpu == 0, page_size,
-            paged_kv_last_page_len_cpu)
+        # write self.paged_kv_last_page_len_cpu inplace
+        torch.where(paged_kv_last_page_len_cpu == 0,
+                    torch.tensor(page_size),
+                    paged_kv_last_page_len_cpu,
+                    out=self.paged_kv_last_page_len_cpu[:num_reqs])
+
         cache_dtype = self.cache_config.cache_dtype
         if cache_dtype.startswith("fp8"):
             kv_cache_dtype = FlashInferBackend.get_fp8_dtype_for_flashinfer(
@@ -402,9 +510,10 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
         attn_metadata = FlashInferMetadata(
             num_actual_tokens=num_actual_tokens,
             qo_indptr_cpu=common_attn_metadata.query_start_loc_cpu,
-            paged_kv_indptr_cpu=paged_kv_indptr_cpu,
+            paged_kv_indptr_cpu=self.paged_kv_indptr_cpu[:1 + num_reqs],
             paged_kv_indices=paged_kv_indices,
-            paged_kv_last_page_len_cpu=paged_kv_last_page_len_cpu,
+            paged_kv_last_page_len_cpu=self.
+            paged_kv_last_page_len_cpu[:num_reqs],
             num_qo_heads=self.vllm_config.model_config.get_num_attention_heads(
                 self.vllm_config.parallel_config),
             num_kv_heads=self.kv_cache_spec.num_kv_heads,
@@ -431,6 +540,26 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
 
         return attn_metadata
 
+    def build_for_cudagraph_capture(
+            self, common_attn_metadata: CommonAttentionMetadata):
+        """
+        This method builds the metadata for full cudagraph capture.
+        Currently, only decode is supported for full cudagraphs with FlashInfer.
+        """
+        m = common_attn_metadata
+
+        assert m.num_reqs == m.num_actual_tokens, \
+            "FlashInfer only supports decode-only full CUDAGraph capture. " \
+            "Make sure all cudagraph capture sizes <= max_num_seq."
+
+        m.max_query_len = 1  # decode-only
+
+        return self.build(0, m)
+
+    def can_run_in_cudagraph(
+            self, common_attn_metadata: CommonAttentionMetadata) -> bool:
+        return common_attn_metadata.max_query_len == 1
+
     def use_cascade_attention(self, *args, **kwargs) -> bool:
         if self.kv_cache_spec.dtype != self.vllm_config.model_config.dtype:
             # TODO: The cascade wrapper currently does not support setting
@@ -638,3 +767,163 @@ class FlashInferImpl(AttentionImpl):
                         out=output[:num_decode_tokens],
                     )
         return output_padded
+
+
+def fast_plan_decode(
+    self,  # decode wrapper
+    indptr_cpu: torch.Tensor,
+    indices: torch.Tensor,
+    last_page_len_cpu: torch.Tensor,
+    num_qo_heads: int,
+    num_kv_heads: int,
+    head_dim: int,
+    page_size: int,
+    pos_encoding_mode: str = "NONE",
+    window_left: int = -1,
+    logits_soft_cap: Optional[float] = None,
+    q_data_type: Optional[Union[str, torch.dtype]] = "float16",
+    kv_data_type: Optional[Union[str, torch.dtype]] = None,
+    data_type: Optional[Union[str, torch.dtype]] = None,
+    sm_scale: Optional[float] = None,
+    rope_scale: Optional[float] = None,
+    rope_theta: Optional[float] = None,
+    non_blocking: bool = True,
+) -> None:
+    """
+    A faster version of BatchDecodeWithPagedKVCacheWrapper::plan used for 
+    cudagraph capture/replay, while the no cudagraph version turns back 
+    to the original plan.
+    using original plan after passing host-side buffers:
+    - only host-to-device copy of indptr and last_page_len buffers
+    Modifications for cudagraph:
+    - only host-to-device copy of indptr and last_page_len buffers.
+    - avoid device-to-device copy of indices buffer.
+
+    Part of the code get inspiration from the original plan from FlashInfer repo
+    and the implementation of fast_decode_plan for FlashInfer in SGlang repo.
+    """
+    # Warm up with the original plan if it is first call, and always run the
+    # original plan if we run for dynamic shape. For fixed shape (cudagraph),
+    # this warm up is to generate the _cached_module for the decode wrapper.
+    if not self.is_cuda_graph_enabled or \
+        getattr(self, "vllm_first_call", True):
+        self.plan(
+            indptr_cpu,
+            indices,
+            last_page_len_cpu,
+            num_qo_heads,
+            num_kv_heads,
+            head_dim,
+            page_size,
+            pos_encoding_mode,
+            window_left,
+            logits_soft_cap,
+            q_data_type,
+            kv_data_type,
+            data_type,
+            sm_scale,
+            rope_scale,
+            rope_theta,
+            non_blocking,
+        )
+        self.vllm_first_call = False
+        return
+
+    assert self.is_cuda_graph_enabled, "Should be cudagraph only here"
+
+    batch_size = len(last_page_len_cpu)
+    if logits_soft_cap is None:
+        logits_soft_cap = 0.0
+
+    # Handle data types consistently
+    if data_type is not None:
+        if q_data_type is None:
+            q_data_type = data_type
+        if kv_data_type is None:
+            kv_data_type = data_type
+    elif q_data_type is None:
+        q_data_type = "float16"
+
+    if kv_data_type is None:
+        kv_data_type = q_data_type
+    q_data_type = getattr(torch, q_data_type) if isinstance(
+        q_data_type, str) else q_data_type
+    kv_data_type = getattr(torch, kv_data_type) if isinstance(
+        kv_data_type, str) else kv_data_type
+
+    if self.use_tensor_cores:
+        qo_indptr_host = _get_range_buf(batch_size + 1, "cpu")
+
+    if batch_size != self._fixed_batch_size:
+        raise ValueError(
+            "The batch size should be fixed in cudagraph mode, the runtime "
+            "batch size {} mismatches the batch size set during "
+            "initialization {}".format(batch_size, self._fixed_batch_size))
+    if len(indices) > len(self._paged_kv_indices_buf):
+        raise ValueError(
+            "The size of indices should be less than or equal to the "
+            "allocated buffer")
+
+    # host-to-device copy for the indptr buffer
+    self._paged_kv_indptr_buf.copy_(indptr_cpu, non_blocking=True)
+    # host-to-device copy for the last_page_len buffer
+    self._paged_kv_last_page_len_buf.copy_(last_page_len_cpu,
+                                           non_blocking=True)
+
+    indptr_host = indptr_cpu
+    last_page_len_host = last_page_len_cpu
+
+    if self.use_tensor_cores:
+        kv_lens_arr_host = get_seq_lens(indptr_host, last_page_len_host,
+                                        page_size)
+
+        try:
+            # Make sure we pass exactly 15 arguments for tensor core version
+            self._plan_info = self._cached_module.plan(
+                self._float_workspace_buffer,
+                self._int_workspace_buffer,
+                self._pin_memory_int_workspace_buffer,
+                qo_indptr_host,
+                indptr_host,
+                kv_lens_arr_host,
+                batch_size,  # total_num_rows
+                batch_size,
+                num_qo_heads,
+                num_kv_heads,
+                page_size,
+                self.is_cuda_graph_enabled,
+                head_dim,
+                head_dim,
+                False,  # causal
+            )
+        except Exception as e:
+            raise RuntimeError(f"Error in tensor core plan: {e}") from e
+    else:
+        try:
+            # Make sure we pass exactly 15 arguments for standard version
+            self._plan_info = self._cached_module.plan(
+                self._float_workspace_buffer,
+                self._int_workspace_buffer,
+                self._pin_memory_int_workspace_buffer,
+                indptr_host,
+                batch_size,
+                num_qo_heads,
+                num_kv_heads,
+                page_size,
+                self.is_cuda_graph_enabled,
+                window_left,
+                logits_soft_cap,
+                head_dim,
+                head_dim,
+                torch.empty(0, dtype=q_data_type),
+                torch.empty(0, dtype=kv_data_type),
+            )
+        except Exception as e:
+            raise RuntimeError(f"Error in standard plan: {e}") from e
+
+    self._pos_encoding_mode = pos_encoding_mode
+    self._window_left = window_left
+    self._logits_soft_cap = logits_soft_cap
+    self._sm_scale = sm_scale
+    self._rope_scale = rope_scale
+    self._rope_theta = rope_theta
diff --git a/vllm/v1/attention/backends/mla/flashmla.py b/vllm/v1/attention/backends/mla/flashmla.py
index 39463b9c06164..b5aecff9937f3 100644
--- a/vllm/v1/attention/backends/mla/flashmla.py
+++ b/vllm/v1/attention/backends/mla/flashmla.py
@@ -18,6 +18,7 @@ from vllm.v1.attention.backends.mla.common import (MLACommonBackend,
                                                    MLACommonImpl,
                                                    MLACommonMetadata,
                                                    MLACommonMetadataBuilder)
+from vllm.v1.attention.backends.utils import AttentionCGSupport
 from vllm.v1.kv_cache_interface import AttentionSpec
 
 logger = init_logger(__name__)
@@ -54,7 +55,8 @@ class FlashMLAMetadata(MLACommonMetadata[FlashMLADecodeMetadata]):
 
 
 class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]):
-    full_cudagraph_supported: ClassVar[bool] = True  # Decode-only
+    attn_cudagraph_support: ClassVar[AttentionCGSupport] = \
+        AttentionCGSupport.PURE_DECODE_ONLY
 
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                  vllm_config: VllmConfig, device: torch.device):
diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
index 5c5891f035ae2..8b55e1a301992 100644
--- a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
+++ b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
@@ -17,6 +17,7 @@ from vllm.v1.attention.backends.mla.common import (MLACommonBackend,
                                                    MLACommonImpl,
                                                    MLACommonMetadata,
                                                    MLACommonMetadataBuilder)
+from vllm.v1.attention.backends.utils import AttentionCGSupport
 from vllm.v1.kv_cache_interface import AttentionSpec
 
 # yapf: enable
@@ -64,7 +65,8 @@ class AiterMLAMetadata(MLACommonMetadata[AiterMLADecodeMetadata]):
 
 
 class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
-    full_cudagraph_supported: ClassVar[bool] = True  # decode only
+    attn_cudagraph_support: ClassVar[AttentionCGSupport] = \
+        AttentionCGSupport.PURE_DECODE_ONLY
 
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                  vllm_config: VllmConfig, device: torch.device):
diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py
index 195fbd3b1b9c4..942cb95eefa2f 100644
--- a/vllm/v1/attention/backends/triton_attn.py
+++ b/vllm/v1/attention/backends/triton_attn.py
@@ -18,7 +18,8 @@ from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
-from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
+from vllm.v1.attention.backends.utils import (AttentionCGSupport,
+                                              AttentionMetadataBuilder,
                                               CommonAttentionMetadata)
 from vllm.v1.kv_cache_interface import AttentionSpec
 
@@ -57,7 +58,8 @@ class TritonAttentionMetadata:
 
 class TritonAttentionMetadataBuilder(
         AttentionMetadataBuilder[TritonAttentionMetadata]):
-    full_cudagraph_supported: ClassVar[bool] = True
+    attn_cudagraph_support: ClassVar[AttentionCGSupport] = \
+        AttentionCGSupport.ALWAYS
 
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                  vllm_config: VllmConfig, device: torch.device):
diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index 36bacf0cb36f8..d39cc0a39f45c 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import abc
+import enum
 import functools
 from abc import abstractmethod
 from dataclasses import dataclass, make_dataclass
@@ -65,9 +66,24 @@ class CommonAttentionMetadata:
 M = TypeVar("M")
 
 
+class AttentionCGSupport(enum.Enum):
+    """ Constants for the cudagraph support of the attention backend
+    Here we do not consider the cascade attention, as currently
+    it is never cudagraph supported."""
+
+    NEVER = 0
+    """NO cudagraph support"""
+    PURE_DECODE_ONLY = 1
+    """Cudagraph supported for pure decode, need to run without
+    cudagraph for mixed prefill-decode batches"""
+    ALWAYS = 2
+    """Cudagraph always supported"""
+
+
 class AttentionMetadataBuilder(abc.ABC, Generic[M]):
     # Does this backend/builder support CUDA Graphs for attention.
-    full_cudagraph_supported: ClassVar[bool] = False
+    attn_cudagraph_support: ClassVar[AttentionCGSupport] = \
+        AttentionCGSupport.NEVER
 
     @abstractmethod
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 29cda4d837bf3..d5a5799efb47c 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -47,7 +47,7 @@ from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
                         is_pin_memory_available, round_up, supports_dynamo)
 from vllm.v1.attention.backends.mamba_selectors import get_mamba_attn_backend
 from vllm.v1.attention.backends.utils import (
-    AttentionMetadataBuilder, CommonAttentionMetadata,
+    AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata,
     make_kv_sharing_fast_prefill_attention_metadata,
     make_local_attention_virtual_batches)
 from vllm.v1.core.encoder_cache_manager import compute_encoder_budget
@@ -2619,12 +2619,22 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             self.device,
         )
 
-        if (self.full_cuda_graph
-                and not attn_metadata_builder_i.full_cudagraph_supported):
-            raise ValueError(
-                f"Full CUDAGraph not supported for "
-                f"{attn_backend_i.__name__}. Turn off CompilationConfig."
-                f"full_cuda_graph or use a different attention backend.")
+        if self.full_cuda_graph:
+            if attn_metadata_builder_i.attn_cudagraph_support == \
+                AttentionCGSupport.NEVER:
+                raise ValueError(f"Full CUDAGraph not supported for "
+                                 f"{attn_backend_i.__name__}. Turn off "
+                                 f"CompilationConfig.full_cuda_graph or use a "
+                                 f" different attention backend.")
+            if attn_metadata_builder_i.attn_cudagraph_support == \
+                AttentionCGSupport.PURE_DECODE_ONLY:
+                # Limit the max cudagraph size to the max number of
+                # sequences for pure decode only cudagraph backend,
+                # whose max_query_len is 1.
+                self.cudagraph_batch_sizes = [
+                    size for size in self.cudagraph_batch_sizes
+                    if size <= self.scheduler_config.max_num_seqs
+                ]
         return attn_backend_i, attn_metadata_builder_i
 
     def initialize_attn_backend(self, kv_cache_config: KVCacheConfig) -> None:
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
index 0f46ed223ab88..4bc4ece9a0df4 100644
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -321,11 +321,16 @@ class Worker(WorkerBase):
         if get_pp_group().is_last_rank:
             max_num_reqs = min(self.scheduler_config.max_num_seqs,
                                self.scheduler_config.max_num_batched_tokens)
+            # activate building attn_metadata for this dummy run to avoid
+            # potential illegal memory access for full cudagraph relay.
+            attn_cudagraph = self.compilation_config.full_cuda_graph and\
+                not self.model_config.enforce_eager
 
             # We skip EPLB here since we don't want to record dummy metrics
             hidden_states, last_hidden_states = \
                 self.model_runner._dummy_run(
                     num_tokens=max_num_reqs,
+                    capture_attn_cudagraph=attn_cudagraph,
                     skip_eplb=True,
                 )
             if self.model_runner.is_pooling_model:

From ee2eb6ecd86be4b47e334f74feb7874b9a41ca25 Mon Sep 17 00:00:00 2001
From: vllmellm <vllm.ellm@embeddedllm.com>
Date: Sat, 2 Aug 2025 10:34:37 +0800
Subject: [PATCH 148/224] [Model] Qwen2.5 VL SiLU-and-Mul (#22066)

Signed-off-by: kf <kuanfu.liu@embeddedllm.com>
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Co-authored-by: kf <kuanfu.liu@embeddedllm.com>
---
 vllm/model_executor/models/qwen2_5_vl.py | 44 +++++++++++-------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py
index c4c4650f569e1..04e64422d2e0b 100644
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -43,9 +43,10 @@ from vllm.distributed import parallel_state
 from vllm.distributed import utils as dist_utils
 from vllm.logger import init_logger
 from vllm.model_executor import SamplingMetadata
-from vllm.model_executor.layers.activation import _ACTIVATION_REGISTRY
+from vllm.model_executor.layers.activation import get_act_and_mul_fn
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
+                                               MergedColumnParallelLinear,
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.quantization import QuantizationConfig
@@ -171,16 +172,12 @@ class Qwen2_5_VisionMLP(nn.Module):
                  quant_config: Optional[QuantizationConfig] = None,
                  prefix: str = ""):
         super().__init__()
-        self.gate_proj = ColumnParallelLinear(in_features,
-                                              hidden_features,
-                                              bias=bias,
-                                              quant_config=quant_config,
-                                              prefix=f"{prefix}.gate_proj")
-        self.up_proj = ColumnParallelLinear(in_features,
-                                            hidden_features,
-                                            bias=bias,
-                                            quant_config=quant_config,
-                                            prefix=f"{prefix}.up_proj")
+        self.gate_up_proj = MergedColumnParallelLinear(
+            input_size=in_features,
+            output_sizes=[hidden_features] * 2,  # [gate_proj, up_proj]
+            bias=bias,
+            quant_config=quant_config,
+            prefix=f"{prefix}.gate_up_proj")
         self.down_proj = RowParallelLinear(hidden_features,
                                            in_features,
                                            bias=bias,
@@ -189,10 +186,9 @@ class Qwen2_5_VisionMLP(nn.Module):
         self.act_fn = act_fn
 
     def forward(self, x: torch.Tensor):
-        x_gate, _ = self.gate_proj(x)
-        x_gate = self.act_fn(x_gate)
-        x_up, _ = self.up_proj(x)
-        x_down, _ = self.down_proj(x_gate * x_up)
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x_down, _ = self.down_proj(x)
         return x_down
 
 
@@ -540,14 +536,14 @@ class Qwen2_5_VisionTransformer(nn.Module):
         self.rotary_pos_emb = Qwen2_5_VisionRotaryEmbedding(head_dim // 2)
 
         self.blocks = nn.ModuleList([
-            Qwen2_5_VisionBlock(
-                dim=self.hidden_size,
-                num_heads=self.num_heads,
-                mlp_hidden_dim=vision_config.intermediate_size,
-                act_fn=_ACTIVATION_REGISTRY[vision_config.hidden_act],
-                norm_layer=norm_layer,
-                quant_config=quant_config,
-                prefix=f"{prefix}.blocks.{layer_idx}")
+            Qwen2_5_VisionBlock(dim=self.hidden_size,
+                                num_heads=self.num_heads,
+                                mlp_hidden_dim=vision_config.intermediate_size,
+                                act_fn=get_act_and_mul_fn(
+                                    vision_config.hidden_act),
+                                norm_layer=norm_layer,
+                                quant_config=quant_config,
+                                prefix=f"{prefix}.blocks.{layer_idx}")
             for layer_idx in range(depth)
         ])
         self.merger = Qwen2_5_VisionPatchMerger(
@@ -752,6 +748,8 @@ class Qwen2_5_VisionTransformer(nn.Module):
             ("attn.qkv.", "attn.q.", "q"),
             ("attn.qkv.", "attn.k.", "k"),
             ("attn.qkv.", "attn.v.", "v"),
+            ("mlp.gate_up_proj.", "mlp.gate_proj.", 0),
+            ("mlp.gate_up_proj.", "mlp.up_proj.", 1),
         ]
         params_dict = dict(self.named_parameters(remove_duplicate=False))
         loaded_params: set[str] = set()

From 57393715e804387588241fbdb4ec94a7570230b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= <nlucches@redhat.com>
Date: Sat, 2 Aug 2025 04:41:40 +0200
Subject: [PATCH 149/224] [Misc] `VLLM_TARGET_DEVICE.lower()` (#22101)

Signed-off-by: NickLucche <nlucches@redhat.com>
---
 vllm/envs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/envs.py b/vllm/envs.py
index 2fda2903179b5..c161fa0dff6ba 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -213,7 +213,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
     # Target device of vLLM, supporting [cuda (by default),
     # rocm, neuron, cpu]
     "VLLM_TARGET_DEVICE":
-    lambda: os.getenv("VLLM_TARGET_DEVICE", "cuda"),
+    lambda: os.getenv("VLLM_TARGET_DEVICE", "cuda").lower(),
 
     # Maximum number of compilation jobs to run in parallel.
     # By default this is the number of CPUs

From a65f46be5ea9a92dde48df2b951c1915aa1d9595 Mon Sep 17 00:00:00 2001
From: Varun Sundar Rabindranath <varunsundar08@gmail.com>
Date: Sat, 2 Aug 2025 08:12:03 +0530
Subject: [PATCH 150/224] [Misc] DeepGemmExperts : Avoid JIT generation in the
 hot-path (#21955)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
---
 vllm/envs.py                                  |  9 +++
 .../layers/fused_moe/deep_gemm_moe.py         | 77 ++++++++++++++++++-
 vllm/utils/deep_gemm.py                       |  7 ++
 3 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/vllm/envs.py b/vllm/envs.py
index c161fa0dff6ba..2d470c6dccbfd 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -126,6 +126,7 @@ if TYPE_CHECKING:
     VLLM_TPU_MOST_MODEL_LEN: Optional[int] = None
     VLLM_TPU_USING_PATHWAYS: bool = False
     VLLM_USE_DEEP_GEMM: bool = False
+    VLLM_SKIP_DEEP_GEMM_WARMUP: bool = False
     VLLM_USE_FLASHINFER_MOE_FP8: bool = False
     VLLM_USE_FLASHINFER_MOE_FP4: bool = False
     VLLM_XGRAMMAR_CACHE_MB: int = 0
@@ -910,6 +911,14 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_USE_DEEP_GEMM":
     lambda: bool(int(os.getenv("VLLM_USE_DEEP_GEMM", "0"))),
 
+    # DeepGemm JITs the kernels on-demand. The warmup attempts to make DeepGemm
+    # JIT all the required kernels before model execution so there is no
+    # JIT'ing in the hot-path. However, this warmup increases the engine
+    # startup time by a couple of minutes.
+    # Set `VLLM_SKIP_DEEP_GEMM_WARMUP` to disable the warmup.
+    "VLLM_SKIP_DEEP_GEMM_WARMUP":
+    lambda: bool(int(os.getenv("VLLM_SKIP_DEEP_GEMM_WARMUP", "0"))),
+
     # Allow use of FlashInfer MoE kernels for fused moe ops.
     "VLLM_USE_FLASHINFER_MOE_FP8":
     lambda: bool(int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP8", "0"))),
diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
index b89e5ac6f093e..bd3605378b6dc 100644
--- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
@@ -4,7 +4,9 @@ import functools
 from typing import Any, Optional
 
 import torch
+from tqdm import tqdm
 
+import vllm.envs as env
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
@@ -17,7 +19,7 @@ from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
 from vllm.model_executor.layers.fused_moe.utils import _resize_cache
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
     per_token_group_quant_fp8)
-from vllm.utils import has_deep_gemm
+from vllm.utils import has_deep_gemm, run_once
 from vllm.utils.deep_gemm import m_grouped_fp8_gemm_nt_contiguous
 
 logger = init_logger(__name__)
@@ -82,6 +84,65 @@ def _valid_deep_gemm(hidden_states: torch.Tensor, w1: torch.Tensor,
     return True
 
 
+@run_once
+def warmup_deepgemm_gg_contiguous_kernels(w1: torch.Tensor, w2: torch.Tensor,
+                                          w1_scale: torch.Tensor,
+                                          w2_scale: torch.Tensor,
+                                          num_topk: int):
+    """
+    DeepGemm JITs the grouped-gemm kernels. The JIT'ing happens based on the
+    input tensor shapes. In this function, we construct all possible input
+    tensor shapes so all the kernels are JIT'ed and cached.
+    Note that this warmup is expected to happen during the model profile
+    call and not during actual model inference.
+    """
+
+    assert w1.size(0) == w2.size(0), (
+        "w1 and w2 must have the same number of experts")
+
+    block_m = deep_gemm_block_shape()[0]
+    num_experts = w1.size(0)
+    device = w1.device
+
+    # This is the maximum GroupedGemm M size that we expect to run
+    # the grouped_gemm with.
+    MAX_M = compute_aligned_M(env.VLLM_FUSED_MOE_CHUNK_SIZE,
+                              num_topk,
+                              num_experts,
+                              block_m,
+                              expert_tokens_meta=None)
+    # Distribute expert-ids evenly.
+    MAX_BLOCKS = MAX_M // block_m
+    expert_ids_block = torch.randint(low=0,
+                                     high=num_experts,
+                                     size=(MAX_BLOCKS, ),
+                                     device=device,
+                                     dtype=torch.int32)
+    expert_ids = torch.repeat_interleave(expert_ids_block, block_m, dim=0)
+
+    def _warmup(w: torch.Tensor, w_scale: torch.Tensor):
+
+        _, n, k = w.size()
+        a1q = torch.empty((MAX_M, k), device=device).to(torch.float8_e4m3fn)
+        a1q_scales = torch.empty((MAX_M, k // block_m),
+                                 device=device,
+                                 dtype=torch.float32)
+        out = torch.empty((MAX_M, n), device=device, dtype=torch.bfloat16)
+
+        pbar = tqdm(total=MAX_BLOCKS,
+                    desc=f"DeepGemmExperts GEMM warmup (MAX_M={MAX_M})")
+        num_tokens = MAX_M
+        while num_tokens > 0:
+            m_grouped_fp8_gemm_nt_contiguous(
+                (a1q[:num_tokens], a1q_scales[:num_tokens]), (w, w_scale),
+                out[:num_tokens], expert_ids[:num_tokens])
+            pbar.update(1)
+            num_tokens = num_tokens - block_m
+
+    _warmup(w1, w1_scale)
+    _warmup(w2, w2_scale)
+
+
 class DeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
 
     def __init__(self):
@@ -156,6 +217,20 @@ class DeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
     ):
         assert self.block_shape is not None
         assert a1q_scale is not None
+        assert w1_scale is not None
+        assert w2_scale is not None
+
+        if not env.VLLM_SKIP_DEEP_GEMM_WARMUP:
+            # DeepGemm JITs the grouped-gemm kernels. We don't want the JIT'ing
+            # to happen during actual model-inference. The
+            # `warmup_deepgemm_kernels` function is a `run_once` decorated
+            # function that executes during the model profile run. This warmup
+            # should create all the required JITs for the current model.
+            warmup_deepgemm_gg_contiguous_kernels(w1,
+                                                  w2,
+                                                  w1_scale,
+                                                  w2_scale,
+                                                  num_topk=topk_ids.size(1))
 
         a1q = hidden_states
         _, N, K = w1.size()
diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py
index 4dedee2a3f862..8ab34e7505ee2 100644
--- a/vllm/utils/deep_gemm.py
+++ b/vllm/utils/deep_gemm.py
@@ -8,6 +8,7 @@ from __future__ import annotations
 
 import functools
 import importlib
+import os
 from typing import Any, Callable, NoReturn
 
 import torch
@@ -77,6 +78,12 @@ def _lazy_init() -> None:
     if not has_deep_gemm():
         return
 
+    # Set up deep_gemm cache path
+    DEEP_GEMM_JIT_CACHE_ENV_NAME = 'DG_JIT_CACHE_DIR'
+    if not os.environ.get(DEEP_GEMM_JIT_CACHE_ENV_NAME, None):
+        os.environ[DEEP_GEMM_JIT_CACHE_ENV_NAME] = os.path.join(
+            envs.VLLM_CACHE_ROOT, "deep_gemm")
+
     _dg = importlib.import_module("deep_gemm")
 
     _fp8_gemm_nt_impl = _resolve_symbol(_dg, "fp8_gemm_nt",

From 9f9c38c392476fd35b9154221c00a2255dcfd010 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Fri, 1 Aug 2025 22:43:37 -0400
Subject: [PATCH 151/224] [Speculators][Speculative Decoding] Add Qwen Eagle3
 Support (#21835)

Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com>
---
 .../speculators/test_eagle3.py                | 14 +++++++++++--
 vllm/config.py                                | 15 ++++++++++---
 vllm/model_executor/models/qwen2.py           | 21 +++++++++++++------
 vllm/model_executor/models/qwen3.py           |  7 +++++++
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/tests/speculative_decoding/speculators/test_eagle3.py b/tests/speculative_decoding/speculators/test_eagle3.py
index c58fc8c0dc5f4..c46ac7a88b751 100644
--- a/tests/speculative_decoding/speculators/test_eagle3.py
+++ b/tests/speculative_decoding/speculators/test_eagle3.py
@@ -6,11 +6,21 @@ import torch
 
 @pytest.mark.parametrize(
     "model_path",
-    [("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717"),
-     ("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")])
+    [("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")])
 def test_llama(vllm_runner, example_prompts, model_path):
     with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
         vllm_outputs = vllm_model.generate_greedy(example_prompts,
                                                   max_tokens=20)
         print(vllm_outputs)
         assert vllm_outputs
+
+
+@pytest.mark.parametrize(
+    "model_path",
+    [("nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized")])
+def test_qwen(vllm_runner, example_prompts, model_path):
+    with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
+        vllm_outputs = vllm_model.generate_greedy(example_prompts,
+                                                  max_tokens=20)
+        print(vllm_outputs)
+        assert vllm_outputs
diff --git a/vllm/config.py b/vllm/config.py
index dabb4b524dfd8..95dae4275edf3 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -3175,10 +3175,19 @@ class SpeculativeConfig:
                              "speculative decoding is > 1, but got "
                              f"{self.disable_by_batch_size=}")
 
-        if self.method == "eagle3" and self.target_model_config and \
-            "llama" not in self.target_model_config.hf_text_config.model_type:
+        from vllm.transformers_utils.configs import SpeculatorsConfig
+
+        eagle3_target_supported = ["llama"]
+        if self.draft_model_config and isinstance(
+                self.draft_model_config.hf_config, SpeculatorsConfig):
+            eagle3_target_supported.append("qwen")
+
+        if self.method == "eagle3" and self.target_model_config and not any(
+                supported_model in
+                self.target_model_config.hf_text_config.model_type
+                for supported_model in eagle3_target_supported):
             raise ValueError(
-                "Eagle3 is only supported for Llama models. "
+                f"Eagle3 is only supported for {eagle3_target_supported} models. "  # noqa: E501
                 f"Got {self.target_model_config.hf_text_config.model_type=}")
 
         return self
diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index 23f65b99c22ce..0e7507a4570be 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -330,6 +330,8 @@ class Qwen2Model(nn.Module):
         else:
             self.norm = PPMissingLayer()
 
+        self.aux_hidden_state_layers: tuple[int] = tuple()
+
     def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
         return self.embed_tokens(input_ids)
 
@@ -350,18 +352,25 @@ class Qwen2Model(nn.Module):
             assert intermediate_tensors is not None
             hidden_states = intermediate_tensors["hidden_states"]
             residual = intermediate_tensors["residual"]
-        for layer in self.layers[self.start_layer:self.end_layer]:
-            hidden_states, residual = layer(
-                positions,
-                hidden_states,
-                residual,
-            )
+
+        aux_hidden_states = []
+        for idx, layer in enumerate(
+                self.layers[self.start_layer:self.end_layer]):
+            if idx in self.aux_hidden_state_layers:
+                aux_hidden_states.append(hidden_states + residual)
+            hidden_states, residual = layer(positions, hidden_states, residual)
+
         if not get_pp_group().is_last_rank:
             return IntermediateTensors({
                 "hidden_states": hidden_states,
                 "residual": residual
             })
+
         hidden_states, _ = self.norm(hidden_states, residual)
+
+        if len(aux_hidden_states) > 0:
+            return hidden_states, aux_hidden_states
+
         return hidden_states
 
     def load_weights(self, weights: Iterable[tuple[str,
diff --git a/vllm/model_executor/models/qwen3.py b/vllm/model_executor/models/qwen3.py
index 393ce41a91a00..d2ae8959b103d 100644
--- a/vllm/model_executor/models/qwen3.py
+++ b/vllm/model_executor/models/qwen3.py
@@ -288,6 +288,13 @@ class Qwen3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
         self.make_empty_intermediate_tensors = (
             self.model.make_empty_intermediate_tensors)
 
+    def set_aux_hidden_state_layers(self, layers: tuple[int]) -> None:
+        self.model.aux_hidden_state_layers = layers
+
+    def get_eagle3_aux_hidden_state_layers(self) -> tuple[int]:
+        num_layers = len(self.model.layers)
+        return (2, num_layers // 2, num_layers - 3)
+
     def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
         return self.model.get_input_embeddings(input_ids)
 

From 8d524ce79ffd0571d6a576cb9a5c21feab187246 Mon Sep 17 00:00:00 2001
From: Nick Hill <nhill@redhat.com>
Date: Sat, 2 Aug 2025 03:45:27 +0100
Subject: [PATCH 152/224] [BugFix] Improve internal DP load balancing (#21617)

Signed-off-by: Nick Hill <nhill@redhat.com>
---
 vllm/entrypoints/openai/api_server.py |   3 +
 vllm/v1/engine/async_llm.py           |   4 +
 vllm/v1/engine/coordinator.py         | 110 +++++++++++++++++---------
 vllm/v1/engine/core.py                |  13 +--
 vllm/v1/engine/core_client.py         |  46 +++++++----
 vllm/v1/metrics/stats.py              |   4 +
 vllm/v1/utils.py                      |   1 +
 7 files changed, 122 insertions(+), 59 deletions(-)

diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index b8ec5461f7719..9bf4702320788 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -199,6 +199,8 @@ async def build_async_engine_client_from_engine_args(
 
         from vllm.v1.engine.async_llm import AsyncLLM
         async_llm: Optional[AsyncLLM] = None
+        client_count = client_config.pop(
+            "client_count") if client_config else 1
         client_index = client_config.pop(
             "client_index") if client_config else 0
         try:
@@ -208,6 +210,7 @@ async def build_async_engine_client_from_engine_args(
                 enable_log_requests=engine_args.enable_log_requests,
                 disable_log_stats=engine_args.disable_log_stats,
                 client_addresses=client_config,
+                client_count=client_count,
                 client_index=client_index)
 
             # Don't keep the dummy data in memory
diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
index 308ca32105ba9..45f450291ab63 100644
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -57,6 +57,7 @@ class AsyncLLM(EngineClient):
         start_engine_loop: bool = True,
         stat_loggers: Optional[list[StatLoggerFactory]] = None,
         client_addresses: Optional[dict[str, str]] = None,
+        client_count: int = 1,
         client_index: int = 0,
     ) -> None:
         """
@@ -120,6 +121,7 @@ class AsyncLLM(EngineClient):
             executor_class=executor_class,
             log_stats=self.log_stats,
             client_addresses=client_addresses,
+            client_count=client_count,
             client_index=client_index,
         )
 
@@ -156,6 +158,7 @@ class AsyncLLM(EngineClient):
             enable_log_requests: bool = False,
             disable_log_stats: bool = False,
             client_addresses: Optional[dict[str, str]] = None,
+            client_count: int = 1,
             client_index: int = 0,
             disable_log_requests: bool = True,  # Deprecated, will be removed
     ) -> "AsyncLLM":
@@ -176,6 +179,7 @@ class AsyncLLM(EngineClient):
             log_stats=not disable_log_stats,
             usage_context=usage_context,
             client_addresses=client_addresses,
+            client_count=client_count,
             client_index=client_index,
         )
 
diff --git a/vllm/v1/engine/coordinator.py b/vllm/v1/engine/coordinator.py
index 8d8d1689e61e3..596edfdbe24f8 100644
--- a/vllm/v1/engine/coordinator.py
+++ b/vllm/v1/engine/coordinator.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
 import multiprocessing
 import time
 import weakref
@@ -65,18 +66,14 @@ class DPCoordinator:
 
         # Assume coordinator is colocated with front-end procs when not in
         # either external or hybrid DP LB mode.
+        local_only = not (external_lb or hybrid_lb)
         front_publish_address = get_engine_client_zmq_addr(
-            local_only=not external_lb and not hybrid_lb, host=host)
+            local_only=local_only, host=host)
 
         local_only_eng = dp_size == parallel_config.data_parallel_size_local
         back_publish_address = get_engine_client_zmq_addr(local_only_eng, host)
         back_output_address = get_engine_client_zmq_addr(local_only_eng, host)
 
-        # When in external LB mode, load stats aren't published, only changes
-        # to request wave / running state, so we don't need to rate-limit the
-        # updates to the front-end proc(s).
-        min_stats_update_interval_ms = 0 if external_lb else 100
-
         context = get_mp_context()
         self.proc: multiprocessing.Process = context.Process(
             target=DPCoordinatorProc.run_coordinator,
@@ -86,7 +83,6 @@ class DPCoordinator:
                 "front_publish_address": front_publish_address,
                 "back_output_address": back_output_address,
                 "back_publish_address": back_publish_address,
-                "min_stats_update_interval_ms": min_stats_update_interval_ms,
             },
             daemon=True)
         self.proc.start()
@@ -125,10 +121,6 @@ class DPCoordinatorProc:
 
         self.stats_update_interval_ms = min_stats_update_interval_ms
 
-        self.current_wave = 0
-        self.engines_running = False
-        self.stats_changed = False
-
     @staticmethod
     def run_coordinator(
         engine_count: int,
@@ -155,6 +147,16 @@ class DPCoordinatorProc:
 
         decoder = MsgpackDecoder(EngineCoreOutputs)
 
+        # For tracking request wave progression.
+        current_wave = 0
+        engines_running = False
+
+        # For tracking request counts for internal load-balancing.
+        stats_changed = False
+        last_stats_step = -1
+        last_stats_wave = -1
+        last_step_counts: Optional[list[list[int]]] = None
+
         with make_zmq_socket(
                 path=front_publish_address,  # IPC
                 ctx=self.ctx,
@@ -191,21 +193,33 @@ class DPCoordinatorProc:
             while True:
                 elapsed = int(time.time() * 1000) - last_publish_time
                 # Send at stats_update_interval_ms interval if the stats have
-                # changed, or otherwise every 4 seconds.
+                # changed, or otherwise every 5 seconds.
                 wait_for = (self.stats_update_interval_ms
-                            if self.stats_changed else 4000)
-                events = poller.poll(timeout=max(0, wait_for - elapsed))
+                            if stats_changed else 5000)
+
+                # Wait at least 50ms to ensure we've received all stats for
+                # the current step.
+                min_timeout = 50 if last_step_counts is None else 0
+
+                events = poller.poll(timeout=max(min_timeout, wait_for -
+                                                 elapsed))
                 if not events:
                     # Poller timeout - publish current stats to front-ends.
-                    engine_req_counts_list = self._get_engine_counts()
-                    to_publish = (engine_req_counts_list, self.current_wave,
-                                  self.engines_running)
+                    if last_step_counts is not None:
+                        engine_req_counts_list = last_step_counts
+                        last_step_counts = None
+                    else:
+                        engine_req_counts_list = self._get_engine_counts()
+                        stats_changed = False
+
+                    to_publish = (engine_req_counts_list, current_wave,
+                                  engines_running)
                     publish_front.send(msgspec.msgpack.encode(to_publish))
                     last_publish_time = int(time.time() * 1000)
-                    self.stats_changed = False
                     continue
 
                 events = dict(events)
+                wave_state_changed = False
 
                 if publish_front in events:
                     buffer = publish_front.recv()
@@ -232,7 +246,7 @@ class DPCoordinatorProc:
                             # current_wave
                             # we note that 0 is the wave number for the new
                             # engine
-                            self.engines_running = False
+                            engines_running = False
                             logger.info(
                                 "DPCoordinator scaled up from %s to %s "
                                 "engines", current_count, new_engine_count)
@@ -248,15 +262,15 @@ class DPCoordinatorProc:
                     # engines are paused, so that we can wake the other
                     # engines.
                     engine_to_exclude, wave = decoded
-                    if not self.engines_running:
-                        if wave < self.current_wave:
+                    if not engines_running:
+                        if wave < current_wave:
                             # If the wave number is stale, ensure the message
                             # is handled by all the engines.
                             engine_to_exclude = None
 
-                        self.engines_running = True
-                        self.stats_changed = True
-                        self._send_start_wave(publish_back, self.current_wave,
+                        engines_running = True
+                        wave_state_changed = True
+                        self._send_start_wave(publish_back, current_wave,
                                               engine_to_exclude)
 
                 if output_back in events:
@@ -274,36 +288,56 @@ class DPCoordinatorProc:
                         # 1. Updated request load stats - update our local
                         # state with these.
                         stats = self.engines[eng_index].request_counts
+                        stats_step = scheduler_stats.step_counter
+                        stats_wave = scheduler_stats.current_wave
+                        if (stats_wave > last_stats_wave
+                                or stats_wave == last_stats_wave
+                                and stats_step > last_stats_step):
+                            if stats_changed:
+                                last_step_counts = self._get_engine_counts(
+                                    do_copy=True)
+                            last_stats_step = stats_step
+                            last_stats_wave = stats_wave
+                        elif stats_wave != last_stats_wave or (
+                                stats_step != last_stats_step):
+                            logger.warning(
+                                "Received stats for out-of-order "
+                                "step (%d, %d) from engine %d (expected "
+                                "> (%d, %d))", stats_wave, stats_step,
+                                eng_index, last_stats_wave, last_stats_step)
                         stats[0] = scheduler_stats.num_waiting_reqs
                         stats[1] = scheduler_stats.num_running_reqs
-                        self.stats_changed = True
+                        stats_changed = True
 
                     if (wave := outputs.wave_complete) is not None:
                         # 2. Notification from rank 0 engine that we've
                         # moved into the global paused state
                         # (engines_running==False).
-                        if self.current_wave <= wave:
+                        if current_wave <= wave:
                             new_wave = wave + 1
                             logger.debug("Moving DP wave from %d to %d.",
-                                         self.current_wave, new_wave)
-                            self.current_wave = new_wave
-                            self.engines_running = False
-                            self.stats_changed = True
+                                         current_wave, new_wave)
+                            current_wave = new_wave
+                            engines_running = False
+                            wave_state_changed = True
                     elif (wave := outputs.start_wave) is not None and (
-                            wave > self.current_wave or
-                        (wave == self.current_wave
-                         and not self.engines_running)):
+                            wave > current_wave or
+                        (wave == current_wave and not engines_running)):
                         # 3. The engine received request for a non-current wave
                         # so we must ensure that other engines progress to the
                         # next wave (race condition handling).
                         logger.debug(
                             "Starting wave %d after notification of "
                             "stale wave request from engine.", wave)
-                        self.current_wave = wave
-                        self.engines_running = True
-                        self.stats_changed = True
+                        current_wave = wave
+                        engines_running = True
+                        wave_state_changed = True
                         self._send_start_wave(publish_back, wave, eng_index)
 
+                if wave_state_changed:
+                    message = (None, current_wave, engines_running)
+                    publish_front.send(msgspec.msgpack.encode(message))
+
     @staticmethod
     def _send_start_wave(socket: zmq.Socket, wave: int,
                          exclude_engine_index: Optional[int]):
@@ -316,6 +350,8 @@ class DPCoordinatorProc:
         socket.send_multipart(
             (EngineCoreRequestType.START_DP_WAVE.value, wave_encoded))
 
-    def _get_engine_counts(self) -> list[list[int]]:
+    def _get_engine_counts(self, do_copy=False) -> list[list[int]]:
         """Return list of [waiting, running] count lists for each engine."""
+        if do_copy:
+            return [copy.copy(e.request_counts) for e in self.engines]
         return [e.request_counts for e in self.engines]
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 0a889b2a0a184..79c47e1028882 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -928,7 +928,7 @@ class DPEngineCoreProc(EngineCoreProc):
     ):
         # Counts forward-passes of the model so that we can synchronize
         # finished with DP peers every N steps.
-        self.counter = 0
+        self.step_counter = 0
         self.current_wave = 0
         self.last_counts = (0, 0)
 
@@ -999,7 +999,9 @@ class DPEngineCoreProc(EngineCoreProc):
         counts = self.scheduler.get_request_counts()
         if counts != self.last_counts:
             self.last_counts = counts
-            stats = SchedulerStats(*counts)
+            stats = SchedulerStats(*counts,
+                                   step_counter=self.step_counter,
+                                   current_wave=self.current_wave)
             self.output_queue.put_nowait(
                 (-1, EngineCoreOutputs(scheduler_stats=stats)))
 
@@ -1041,15 +1043,16 @@ class DPEngineCoreProc(EngineCoreProc):
                     self.output_queue.put_nowait(
                         (client_index,
                          EngineCoreOutputs(wave_complete=self.current_wave)))
+                # Increment wave count and reset step counter.
                 self.current_wave += 1
+                self.step_counter = 0
 
     def _has_global_unfinished_reqs(self, local_unfinished: bool) -> bool:
 
         # Optimization - only perform finish-sync all-reduce every 32 steps.
-        self.counter += 1
-        if self.counter != 32:
+        self.step_counter += 1
+        if self.step_counter % 32 != 0:
             return True
-        self.counter = 0
 
         return ParallelConfig.has_unfinished_dp(self.dp_group,
                                                 local_unfinished)
diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
index 26985df6f62df..4d30bb6b74466 100644
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -86,11 +86,12 @@ class EngineCoreClient(ABC):
         executor_class: type[Executor],
         log_stats: bool,
         client_addresses: Optional[dict[str, str]] = None,
+        client_count: int = 1,
         client_index: int = 0,
     ) -> "MPClient":
         parallel_config = vllm_config.parallel_config
         client_args = (vllm_config, executor_class, log_stats,
-                       client_addresses, client_index)
+                       client_addresses, client_count, client_index)
         if parallel_config.data_parallel_size > 1:
             if parallel_config.data_parallel_external_lb:
                 # External load balancer - client per DP rank.
@@ -727,6 +728,7 @@ class AsyncMPClient(MPClient):
                  executor_class: type[Executor],
                  log_stats: bool,
                  client_addresses: Optional[dict[str, str]] = None,
+                 client_count: int = 1,
                  client_index: int = 0):
         super().__init__(
             asyncio_mode=True,
@@ -929,11 +931,12 @@ class DPAsyncMPClient(AsyncMPClient):
                  executor_class: type[Executor],
                  log_stats: bool,
                  client_addresses: Optional[dict[str, str]] = None,
+                 client_count: int = 1,
                  client_index: int = 0):
         self.current_wave = 0
 
         super().__init__(vllm_config, executor_class, log_stats,
-                         client_addresses, client_index)
+                         client_addresses, client_count, client_index)
 
         # List of [waiting, running] pair per engine.
         # Used only by DPLBAsyncMPClient subclass.
@@ -1029,7 +1032,11 @@ class DPAsyncMPClient(AsyncMPClient):
                     counts, wave, running = msgspec.msgpack.decode(buf)
                     self.current_wave = wave
                     self.engines_running = running
-                    self.lb_engines = counts[count_slice]
+                    if counts is not None:
+                        sliced_counts = counts[count_slice]
+                        self.lb_engines = sliced_counts
+                        logger.debug("Received counts: %s (%s)", sliced_counts,
+                                     count_slice)
 
         resources.stats_update_task = asyncio.create_task(
             run_engine_stats_update_task())
@@ -1065,40 +1072,45 @@ class DPLBAsyncMPClient(DPAsyncMPClient):
                  executor_class: type[Executor],
                  log_stats: bool,
                  client_addresses: Optional[dict[str, str]] = None,
+                 client_count: int = 1,
                  client_index: int = 0):
 
+        self.client_count = client_count
+
         # To route aborts to the correct engine.
         self.reqs_in_flight: dict[str, EngineIdentity] = {}
 
         super().__init__(vllm_config, executor_class, log_stats,
-                         client_addresses, client_index)
+                         client_addresses, client_count, client_index)
 
         assert len(self.core_engines) > 1
 
+        self.eng_start_index = (len(self.core_engines) *
+                                self.client_index) // client_count
+
     def get_core_engine_for_request(
             self, request: EngineCoreRequest) -> EngineIdentity:
         # Engines are in rank order.
+        current_counts = self.lb_engines
         if (eng_index := request.data_parallel_rank) is None:
-            if not self.lb_engines:
+            if not current_counts:
                 return self.core_engine
             # TODO use P2C alg for larger DP sizes
-            num_engines = len(self.lb_engines)
-            min_counts = [sys.maxsize, sys.maxsize]
+            num_engines = len(current_counts)
+            min_score = sys.maxsize
             eng_index = 0
             for i in range(num_engines):
                 # Start from client_index to help with balancing when engines
                 # are empty.
-                idx = (self.client_index + i) % num_engines
-                counts = self.lb_engines[idx]
-                if counts < min_counts:
-                    min_counts = counts
+                idx = (self.eng_start_index + i) % num_engines
+                waiting, running = current_counts[idx]
+                score = waiting * 4 + running
+                if score < min_score:
+                    min_score = score
                     eng_index = idx
-            # Adjust local counts for better balancing between stats updates
-            # from the coordinator (which happen every 100ms).
-            if min_counts[0]:
-                min_counts[0] += 1
-            else:
-                min_counts[1] += 1
+            # Increment local waiting count for better balancing between stats
+            # updates from the coordinator (which happen every 100ms).
+            current_counts[eng_index][0] += self.client_count
 
         chosen_engine = self.core_engines[eng_index]
         # Record which engine is chosen for this request, to handle aborts.
diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py
index 1eb10ccb6c493..9a80460261e02 100644
--- a/vllm/v1/metrics/stats.py
+++ b/vllm/v1/metrics/stats.py
@@ -33,6 +33,10 @@ class SchedulerStats:
     num_running_reqs: int = 0
     num_waiting_reqs: int = 0
 
+    # These are used for internal DP load-balancing.
+    step_counter: int = 0
+    current_wave: int = 0
+
     kv_cache_usage: float = 0.0
 
     prefix_cache_stats: PrefixCacheStats = field(
diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py
index c74d8c543f76c..d0175695c1d0f 100644
--- a/vllm/v1/utils.py
+++ b/vllm/v1/utils.py
@@ -154,6 +154,7 @@ class APIServerProcessManager:
             client_config = {
                 "input_address": in_addr,
                 "output_address": out_addr,
+                "client_count": num_servers,
                 "client_index": i
             }
             if stats_update_address is not None:

From 6e8d8c4afbddf725b34ef938616701869f5b3462 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Fri, 1 Aug 2025 22:45:46 -0400
Subject: [PATCH 153/224] [Test] Add Unit Test for Batched DeepGEMM (#21559)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 tests/kernels/moe/test_batched_deepgemm.py | 103 +++++++++++++++++++++
 tests/kernels/moe/test_deepgemm.py         |   8 +-
 vllm/utils/deep_gemm.py                    |   4 +-
 3 files changed, 107 insertions(+), 8 deletions(-)
 create mode 100644 tests/kernels/moe/test_batched_deepgemm.py

diff --git a/tests/kernels/moe/test_batched_deepgemm.py b/tests/kernels/moe/test_batched_deepgemm.py
new file mode 100644
index 0000000000000..018d4c224f75e
--- /dev/null
+++ b/tests/kernels/moe/test_batched_deepgemm.py
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
+    BatchedDeepGemmExperts)
+from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
+    BatchedPrepareAndFinalize, BatchedTritonExperts)
+from vllm.model_executor.layers.fused_moe.modular_kernel import (
+    FusedMoEModularKernel)
+from vllm.utils.deep_gemm import calc_diff, is_deep_gemm_supported
+
+from .test_deepgemm import make_block_quant_fp8_weights
+
+BLOCK_SIZE = [128, 128]
+
+
+@pytest.mark.skipif(not is_deep_gemm_supported(),
+                    reason="Requires deep_gemm kernels")
+@pytest.mark.parametrize("E", [16, 32])  # number of experts
+@pytest.mark.parametrize("T", [256, 512])  # tokens per expert
+@pytest.mark.parametrize("K", [128, 256])  # hidden dim
+@pytest.mark.parametrize("N", [512, 1024])  # intermediate dim per expert
+@pytest.mark.parametrize("topk", [2, 4])
+def test_batched_deepgemm_vs_triton(E: int, T: int, K: int, N: int, topk: int,
+                                    monkeypatch):
+    """Compare BatchedDeepGemmExperts to BatchedTritonExperts."""
+
+    monkeypatch.setenv("VLLM_USE_DEEP_GEMM", "1")
+
+    device = "cuda"
+    w1, w2, w1_s, w2_s = make_block_quant_fp8_weights(E, N, K, BLOCK_SIZE)
+
+    M = E * T  # total tokens
+    a = torch.randn(M, K, device=device, dtype=torch.bfloat16) / 10.0
+    fp8_info = torch.finfo(torch.float8_e4m3fn)
+    a.clamp_(fp8_info.min, fp8_info.max)
+
+    # random router outputs → top-k indices / weights
+    router_logits = torch.randn(M, E, device=device, dtype=torch.float32)
+    topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1)
+    topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1)
+
+    # token number for each expert
+    cnt = torch.bincount(topk_ids.flatten(), minlength=E)
+    max_cnt = int(cnt.max().item())
+    # next power of 2 for max token number
+    max_num_tokens = 1 << (max_cnt - 1).bit_length()
+
+    prep_finalize = BatchedPrepareAndFinalize(
+        max_num_tokens=max_num_tokens,
+        num_local_experts=E,
+        num_dispatchers=1,
+        rank=0,
+    )
+
+    # triton (reference)
+    triton_experts = BatchedTritonExperts(
+        max_num_tokens=max_num_tokens,
+        num_dispatchers=1,
+        use_fp8_w8a8=True,
+        per_act_token_quant=False,
+        block_shape=BLOCK_SIZE,
+    )
+    mk_triton = FusedMoEModularKernel(prep_finalize, triton_experts)
+
+    out_triton = mk_triton(
+        hidden_states=a,
+        w1=w1,
+        w2=w2,
+        topk_weights=topk_weights,
+        topk_ids=topk_ids,
+        inplace=False,
+        w1_scale=w1_s,
+        w2_scale=w2_s,
+        global_num_experts=E,
+    )
+
+    # deepgemm
+    deepgemm_experts = BatchedDeepGemmExperts(
+        max_num_tokens=max_num_tokens,
+        num_dispatchers=1,
+        block_shape=BLOCK_SIZE,
+        per_act_token_quant=False,
+    )
+    mk_deepgemm = FusedMoEModularKernel(prep_finalize, deepgemm_experts)
+
+    out_deepgemm = mk_deepgemm(
+        hidden_states=a,
+        w1=w1,
+        w2=w2,
+        topk_weights=topk_weights,
+        topk_ids=topk_ids,
+        inplace=False,
+        w1_scale=w1_s,
+        w2_scale=w2_s,
+        global_num_experts=E,
+    )
+
+    diff = calc_diff(out_deepgemm, out_triton)
+    assert diff < 1e-3, f"Output diff too large: {diff}"
diff --git a/tests/kernels/moe/test_deepgemm.py b/tests/kernels/moe/test_deepgemm.py
index b6ea4ee2324c9..b2b78662c9ded 100644
--- a/tests/kernels/moe/test_deepgemm.py
+++ b/tests/kernels/moe/test_deepgemm.py
@@ -20,11 +20,6 @@ from vllm.utils.deep_gemm import (calc_diff, is_deep_gemm_supported,
 
 BLOCK_SIZE = [128, 128]
 
-requires_deep_gemm = pytest.mark.skipif(
-    not is_deep_gemm_supported(),
-    reason="Requires deep_gemm kernels",
-)
-
 
 def make_block_quant_fp8_weights(
     e: int,
@@ -152,7 +147,8 @@ NUM_EXPERTS = [32]
 @pytest.mark.parametrize("mnk", MNKs)
 @pytest.mark.parametrize("topk", TOPKS)
 @pytest.mark.parametrize("num_experts", NUM_EXPERTS)
-@requires_deep_gemm
+@pytest.mark.skipif(not is_deep_gemm_supported(),
+                    reason="Requires deep_gemm kernels")
 def test_deepgemm_vs_triton(mnk, topk, num_experts, monkeypatch):
 
     with monkeypatch.context() as m:
diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py
index 8ab34e7505ee2..0edfb01cde9d6 100644
--- a/vllm/utils/deep_gemm.py
+++ b/vllm/utils/deep_gemm.py
@@ -23,10 +23,10 @@ def is_deep_gemm_supported() -> bool:
     """Return ``True`` if DeepGEMM is supported on the current platform.
     Currently, only Hopper and Blackwell GPUs are supported.
     """
-    supported_arch = current_platform.is_cuda() and (
+    is_supported_arch = current_platform.is_cuda() and (
         current_platform.is_device_capability(90)
         or current_platform.is_device_capability(100))
-    return has_deep_gemm() and supported_arch
+    return has_deep_gemm() and is_supported_arch
 
 
 @functools.cache

From 0edaf752d7482a3c170c25376c466e730ab87ddd Mon Sep 17 00:00:00 2001
From: Sage Moore <sage@neuralmagic.com>
Date: Fri, 1 Aug 2025 19:47:53 -0700
Subject: [PATCH 154/224] [Attention][DBO] Add support for "splitting" the
 CommonAttentionMetadata (#21153)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
---
 .../v1/attention/test_attention_splitting.py  | 157 ++++++++++++++++++
 vllm/v1/attention/backends/utils.py           |  83 +++++++++
 2 files changed, 240 insertions(+)
 create mode 100644 tests/v1/attention/test_attention_splitting.py

diff --git a/tests/v1/attention/test_attention_splitting.py b/tests/v1/attention/test_attention_splitting.py
new file mode 100644
index 0000000000000..3fc1011d5042e
--- /dev/null
+++ b/tests/v1/attention/test_attention_splitting.py
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from tests.v1.attention.test_attention_backends import BATCH_SPECS
+from tests.v1.attention.utils import create_common_attn_metadata
+from vllm.v1.attention.backends.utils import (UbatchSlice,
+                                              _make_metadata_with_slice,
+                                              slice_query_start_locs,
+                                              split_attn_metadata)
+
+
+@pytest.fixture
+def sample_query_start_loc():
+    """Sample query_start_loc tensor for testing"""
+    return torch.tensor([0, 5, 12, 20, 35, 50])
+
+
+def test_basic_slice_middle(sample_query_start_loc):
+    """Test slicing from middle of tensor"""
+    req_slice = slice(1, 3)  # slice from index 1 to 3
+    result = slice_query_start_locs(sample_query_start_loc, req_slice)
+
+    expected = torch.tensor([0, 7, 15])
+    assert torch.equal(result, expected)
+
+
+def test_slice_from_beginning(sample_query_start_loc):
+    """Test slicing from the beginning of tensor"""
+    req_slice = slice(0, 2)  # slice from index 0 to 2
+    result = slice_query_start_locs(sample_query_start_loc, req_slice)
+
+    expected = torch.tensor([0, 5, 12])
+    assert torch.equal(result, expected)
+
+
+def test_slice_to_end(sample_query_start_loc):
+    """Test slicing to the end of tensor"""
+    req_slice = slice(3, 5)  # slice from index 3 to 5 (last index)
+    result = slice_query_start_locs(sample_query_start_loc, req_slice)
+
+    expected = torch.tensor([0, 15, 30])
+    assert torch.equal(result, expected)
+
+
+def test_single_element_slice(sample_query_start_loc):
+    """Test slice that results in single element"""
+    req_slice = slice(2, 3)  # slice from index 2 to 3
+    result = slice_query_start_locs(sample_query_start_loc, req_slice)
+
+    expected = torch.tensor([0, 8])
+    assert torch.equal(result, expected)
+
+
+def test_full_tensor_slice(sample_query_start_loc):
+    """Test slicing the entire tensor"""
+    req_slice = slice(0, 5)  # slice entire tensor
+    result = slice_query_start_locs(sample_query_start_loc, req_slice)
+
+    expected = torch.tensor([0, 5, 12, 20, 35, 50])
+    assert torch.equal(result, expected)
+
+
+def test_slice_bounds_edge_cases(sample_query_start_loc):
+    # Test slice that goes exactly to the last element
+    req_slice = slice(4, 5)  # Last index
+    result = slice_query_start_locs(sample_query_start_loc, req_slice)
+
+    expected = torch.tensor([0, 15])
+    assert torch.equal(result, expected)
+
+
+@pytest.fixture
+def small_decode_metadata():
+    """Create metadata for small decode batch"""
+    batch_spec = BATCH_SPECS["small_decode"]
+    device = torch.device("cpu")
+    return create_common_attn_metadata(batch_spec,
+                                       block_size=16,
+                                       device=device)
+
+
+@pytest.fixture
+def large_decode_metadata():
+    """Create metadata for small decode batch"""
+    batch_spec = BATCH_SPECS["large_decode"]
+    device = torch.device("cpu")
+    return create_common_attn_metadata(batch_spec,
+                                       block_size=16,
+                                       device=device)
+
+
+@pytest.fixture
+def mixed_small_metadata():
+    """Create metadata for mixed small batch"""
+    batch_spec = BATCH_SPECS["mixed_small"]
+    device = torch.device("cpu")
+    return create_common_attn_metadata(batch_spec,
+                                       block_size=16,
+                                       device=device)
+
+
+# Tests for _make_metadata_with_slice
+def test_make_metadata_with_slice_decode_batch(small_decode_metadata):
+    """Test slicing decode batch metadata"""
+    # Split first request only
+    ubatch_slice = UbatchSlice(slice(0, 1), slice(0, 1))
+
+    result = _make_metadata_with_slice(ubatch_slice, small_decode_metadata)
+
+    # Check sliced results
+    assert result.num_reqs == 1  # slice(0, 1) gives 1 requests
+    assert result.num_actual_tokens == 1  # slice(0, 1) gives 1 token
+    assert result.max_query_len == 1
+    assert torch.equal(result.query_start_loc, torch.tensor([0, 1]))
+    assert torch.equal(result.seq_lens, torch.tensor([32]))
+
+
+def test_make_metadata_with_slice_mixed_batch(mixed_small_metadata):
+    """Test slicing mixed batch metadata"""
+    ubatch_slice = UbatchSlice(slice(1, 3),
+                               slice(1, 7))  # Requests 1-3, tokens 1-7
+
+    result = _make_metadata_with_slice(ubatch_slice, mixed_small_metadata)
+
+    assert result.num_reqs == 2  # slice(1, 3) gives 2 requests
+    assert result.num_actual_tokens == 6  # slice(1, 7) gives 6 tokens
+    assert result.max_query_len == 5
+    assert torch.equal(result.query_start_loc, torch.tensor([0, 1, 6]))
+    assert torch.equal(result.seq_lens, torch.tensor([40, 48]))
+
+
+def test_split_attn_metadata_decode_batch(large_decode_metadata):
+    """Test splitting decode batch into two equal parts"""
+    num_tokens = large_decode_metadata.num_reqs
+    mid_point = num_tokens // 2
+    ubatch_slices = [
+        UbatchSlice(slice(0, mid_point), slice(0, mid_point)),
+        UbatchSlice(slice(mid_point, num_tokens), slice(mid_point,
+                                                        num_tokens)),
+    ]
+
+    results = split_attn_metadata(ubatch_slices, large_decode_metadata)
+
+    assert len(results) == 2
+
+    # Check first split
+    assert results[0].num_reqs == mid_point
+    assert results[0].num_actual_tokens == mid_point
+    assert torch.equal(results[0].seq_lens, torch.tensor([2048] * mid_point))
+
+    # Check second split
+    assert results[1].num_reqs == mid_point
+    assert results[1].num_actual_tokens == mid_point
+    assert torch.equal(results[1].seq_lens, torch.tensor([2048] * mid_point))
diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index d39cc0a39f45c..0f041573e9d20 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -63,6 +63,89 @@ class CommonAttentionMetadata:
     causal: bool = True
 
 
+@dataclass
+class UbatchSlice:
+    request_slice: slice
+    token_slice: slice
+
+
+def slice_query_start_locs(
+    query_start_loc: torch.Tensor,
+    request_slice: slice,
+) -> torch.Tensor:
+    """
+    Creates a new query_start_loc that corresponds to the requests in 
+    request_slice.
+
+    Note: This function creates a new tensor to hold the new query_start_locs.
+    This will break cudagraph compatibility.
+    """
+    return query_start_loc[request_slice.start: request_slice.stop + 1] -\
+        query_start_loc[request_slice.start]
+
+
+def _make_metadata_with_slice(
+        ubatch_slice: UbatchSlice,
+        attn_metadata: CommonAttentionMetadata) -> CommonAttentionMetadata:
+    """
+    This function creates a new CommonAttentionMetadata that corresponds to 
+    the requests included in ubatch_slice
+    """
+
+    request_slice = ubatch_slice.request_slice
+    token_slice = ubatch_slice.token_slice
+
+    query_start_loc = slice_query_start_locs(attn_metadata.query_start_loc,
+                                             request_slice)
+    assert len(query_start_loc >= 2)
+    query_start_loc_cpu = slice_query_start_locs(
+        attn_metadata.query_start_loc_cpu, request_slice)
+
+    seq_lens = attn_metadata.seq_lens[request_slice]
+    seq_lens_cpu = attn_metadata.seq_lens_cpu[request_slice]
+    num_computed_tokens_cpu = attn_metadata.num_computed_tokens_cpu[
+        request_slice]
+
+    num_requests = request_slice.stop - request_slice.start
+    num_actual_tokens = token_slice.stop - token_slice.start
+    max_query_len = int(
+        torch.max(torch.abs(query_start_loc_cpu[1:] -
+                            query_start_loc_cpu[:-1])).item())
+
+    block_table_tensor = attn_metadata.block_table_tensor[request_slice]
+    slot_mapping = attn_metadata.slot_mapping[token_slice]
+
+    return CommonAttentionMetadata(
+        query_start_loc=query_start_loc,
+        query_start_loc_cpu=query_start_loc_cpu,
+        seq_lens=seq_lens,
+        seq_lens_cpu=seq_lens_cpu,
+        num_computed_tokens_cpu=num_computed_tokens_cpu,
+        num_reqs=num_requests,
+        num_actual_tokens=num_actual_tokens,
+        max_query_len=max_query_len,
+        block_table_tensor=block_table_tensor,
+        slot_mapping=slot_mapping,
+    )
+
+
+def split_attn_metadata(
+    ubatch_slices: list[UbatchSlice],
+    common_attn_metadata: CommonAttentionMetadata,
+) -> list[CommonAttentionMetadata]:
+    """
+    Creates a new CommonAttentionMetadata instance that corresponds to the 
+    requests for each UbatchSlice in ubatch_slices.
+
+    Note: This function does not modify common_attn_metadata
+    """
+    results = []
+    for ubatch_slice in ubatch_slices:
+        results.append(
+            _make_metadata_with_slice(ubatch_slice, common_attn_metadata))
+    return results
+
+
 M = TypeVar("M")
 
 

From d3a6f2120bb6b67fc58a3f1000d624cfb351eb05 Mon Sep 17 00:00:00 2001
From: vllmellm <vllm.ellm@embeddedllm.com>
Date: Sat, 2 Aug 2025 14:53:18 +0800
Subject: [PATCH 155/224] [FEAT][ROCm] Enable running Flash Attention as ViT
 attn backend for Qwen-VL models on ROCm platform. (#22069)

Signed-off-by: tjtanaavllm <tunjian.tan@amd.com>
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Co-authored-by: tjtanaavllm <tunjian.tan@amd.com>
---
 vllm/model_executor/models/qwen2_5_vl.py | 18 ++++++++----
 vllm/model_executor/models/qwen2_vl.py   | 18 ++++++++----
 vllm/model_executor/models/vision.py     | 36 +++++-------------------
 vllm/platforms/cuda.py                   | 14 +++++++++
 vllm/platforms/interface.py              |  5 ++++
 vllm/platforms/rocm.py                   | 12 ++++++++
 6 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py
index 04e64422d2e0b..45fb7f9580ae4 100644
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -246,11 +246,15 @@ class Qwen2_5_VisionAttention(nn.Module):
         # Detect attention implementation.
         self.attn_backend: _Backend = get_vit_attn_backend(support_fa=True)
         if self.attn_backend not in {
-                _Backend.FLASH_ATTN, _Backend.TORCH_SDPA, _Backend.XFORMERS
+                _Backend.FLASH_ATTN, _Backend.TORCH_SDPA, _Backend.XFORMERS,
+                _Backend.ROCM_AITER_FA
         }:
             raise RuntimeError(
                 f"Qwen2.5-VL does not support {self.attn_backend} backend now."
             )
+        self.is_flash_attn_backend = self.attn_backend in {
+            _Backend.FLASH_ATTN, _Backend.ROCM_AITER_FA
+        }
 
     def split_qkv(self, qkv: torch.Tensor) -> tuple[torch.Tensor, ...]:
         # [s, b, 3 * head * head_dim]
@@ -297,10 +301,13 @@ class Qwen2_5_VisionAttention(nn.Module):
             q = apply_rotary_pos_emb_vision(q, rotary_pos_emb)
             k = apply_rotary_pos_emb_vision(k, rotary_pos_emb)
 
-        if self.attn_backend == _Backend.FLASH_ATTN:
+        if self.is_flash_attn_backend:
             # from vllm_flash_attn.flash_attn_interface import (
             #   flash_attn_varlen_func)
-            from flash_attn import flash_attn_varlen_func
+            if self.attn_backend == _Backend.ROCM_AITER_FA:
+                from aiter import flash_attn_varlen_func
+            else:
+                from flash_attn import flash_attn_varlen_func
 
             q, k, v = (rearrange(x, "b s ... -> (b s) ...") for x in [q, k, v])
 
@@ -311,7 +318,7 @@ class Qwen2_5_VisionAttention(nn.Module):
                                             cu_seqlens_k=cu_seqlens,
                                             max_seqlen_q=max_seqlen,
                                             max_seqlen_k=max_seqlen,
-                                            dropout_p=0,
+                                            dropout_p=0.0,
                                             causal=False)
 
             context_layer = rearrange(output,
@@ -635,7 +642,8 @@ class Qwen2_5_VisionTransformer(nn.Module):
         cu_seqlens: torch.Tensor,
     ) -> tuple[Optional[int], Optional[list[int]]]:
         max_seqlen, seqlens = None, None
-        if self.attn_backend == _Backend.FLASH_ATTN:
+        if (self.attn_backend == _Backend.FLASH_ATTN
+                or self.attn_backend == _Backend.ROCM_AITER_FA):
             max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item()
         elif self.attn_backend == _Backend.XFORMERS:
             seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py
index 4e8ea8e449133..40d77312b72c2 100644
--- a/vllm/model_executor/models/qwen2_vl.py
+++ b/vllm/model_executor/models/qwen2_vl.py
@@ -274,10 +274,14 @@ class Qwen2VisionAttention(nn.Module):
         # Detect attention implementation.
         self.attn_backend: _Backend = get_vit_attn_backend(support_fa=True)
         if self.attn_backend not in {
-                _Backend.FLASH_ATTN, _Backend.TORCH_SDPA, _Backend.XFORMERS
+                _Backend.FLASH_ATTN, _Backend.TORCH_SDPA, _Backend.XFORMERS,
+                _Backend.ROCM_AITER_FA
         }:
             raise RuntimeError(
                 f"Qwen2-VL does not support {self.attn_backend} backend now.")
+        self.is_flash_attn_backend = self.attn_backend in {
+            _Backend.FLASH_ATTN, _Backend.ROCM_AITER_FA
+        }
 
     def split_qkv(self, qkv: torch.Tensor) -> tuple[torch.Tensor, ...]:
         # [s, b, 3 * head * head_dim]
@@ -324,10 +328,13 @@ class Qwen2VisionAttention(nn.Module):
             q = apply_rotary_pos_emb_vision(q, rotary_pos_emb)
             k = apply_rotary_pos_emb_vision(k, rotary_pos_emb)
 
-        if self.attn_backend == _Backend.FLASH_ATTN:
+        if self.is_flash_attn_backend:
             # from vllm_flash_attn.flash_attn_interface import (
             #   flash_attn_varlen_func)
-            from flash_attn import flash_attn_varlen_func
+            if self.attn_backend == _Backend.ROCM_AITER_FA:
+                from aiter import flash_attn_varlen_func
+            else:
+                from flash_attn import flash_attn_varlen_func
 
             q, k, v = (rearrange(x, "b s ... -> (b s) ...") for x in [q, k, v])
 
@@ -338,7 +345,7 @@ class Qwen2VisionAttention(nn.Module):
                                             cu_seqlens_k=cu_seqlens,
                                             max_seqlen_q=max_seqlen,
                                             max_seqlen_k=max_seqlen,
-                                            dropout_p=0,
+                                            dropout_p=0.0,
                                             causal=False)
 
             context_layer = rearrange(output,
@@ -620,7 +627,8 @@ class Qwen2VisionTransformer(nn.Module):
             self, cu_seqlens: torch.Tensor
     ) -> tuple[Optional[int], Optional[list[int]]]:
         max_seqlen, seqlens = None, None
-        if self.attn_backend == _Backend.FLASH_ATTN:
+        if (self.attn_backend == _Backend.FLASH_ATTN
+                or self.attn_backend == _Backend.ROCM_AITER_FA):
             max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item()
         elif self.attn_backend == _Backend.XFORMERS:
             seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
diff --git a/vllm/model_executor/models/vision.py b/vllm/model_executor/models/vision.py
index ac6a659bbaa32..de30509b1ccb4 100644
--- a/vllm/model_executor/models/vision.py
+++ b/vllm/model_executor/models/vision.py
@@ -7,9 +7,7 @@ from typing import Final, Generic, Optional, Protocol, TypeVar, Union
 import torch
 from transformers import PretrainedConfig
 
-import vllm.envs as envs
-from vllm.attention.selector import (backend_name_to_enum,
-                                     get_global_forced_attn_backend)
+from vllm.attention.selector import get_env_variable_attn_backend
 from vllm.logger import init_logger
 from vllm.platforms import _Backend, current_platform
 
@@ -75,32 +73,12 @@ def get_vit_attn_backend(support_fa: bool = False) -> _Backend:
     Get the available attention backend for Vision Transformer.
     """
     # TODO(Isotr0py): Remove `support_fa` after support FA for all ViTs attn.
-    selected_backend: Optional[_Backend] = get_global_forced_attn_backend()
-    if selected_backend is None:
-        backend_by_env_var: Optional[str] = envs.VLLM_ATTENTION_BACKEND
-        if backend_by_env_var is not None:
-            selected_backend = backend_name_to_enum(backend_by_env_var)
-    if selected_backend is None:
-        if current_platform.is_cuda():
-            device_available = current_platform.has_device_capability(80)
-            if device_available and support_fa:
-                from transformers.utils import is_flash_attn_2_available
-                if is_flash_attn_2_available():
-                    selected_backend = _Backend.FLASH_ATTN
-                else:
-                    logger.warning_once(
-                        "Current `vllm-flash-attn` has a bug inside vision "
-                        "module, so we use xformers backend instead. You can "
-                        "run `pip install flash-attn` to use flash-attention "
-                        "backend.")
-                    selected_backend = _Backend.XFORMERS
-            else:
-                # For Volta and Turing GPUs, use xformers instead.
-                selected_backend = _Backend.XFORMERS
-        else:
-            # Default to torch SDPA for other non-GPU platforms.
-            selected_backend = _Backend.TORCH_SDPA
-    return selected_backend
+
+    selected_backend: Optional[_Backend] = get_env_variable_attn_backend()
+    if selected_backend is not None:
+        return selected_backend
+
+    return current_platform.get_vit_attn_backend(support_fa)
 
 
 def resolve_visual_encoder_outputs(
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index 87ff6b385809a..a90910639f784 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -206,6 +206,20 @@ class CudaPlatformBase(Platform):
         torch.cuda.reset_peak_memory_stats(device)
         return torch.cuda.max_memory_allocated(device)
 
+    @classmethod
+    def get_vit_attn_backend(cls, support_fa: bool = False) -> _Backend:
+        if cls.has_device_capability(80) and support_fa:
+            from transformers.utils import is_flash_attn_2_available
+            if is_flash_attn_2_available():
+                return _Backend.FLASH_ATTN
+            logger.warning_once(
+                "Current `vllm-flash-attn` has a bug inside vision "
+                "module, so we use xformers backend instead. You can "
+                "run `pip install flash-attn` to use flash-attention "
+                "backend.")
+        # Fallback for Volta/Turing GPUs or FA not supported
+        return _Backend.XFORMERS
+
     @classmethod
     def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
                              kv_cache_dtype, block_size, use_v1,
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 6bae0fe25c797..997aee7063f57 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -46,6 +46,7 @@ class _Backend(enum.Enum):
     ROCM_FLASH = enum.auto()
     ROCM_AITER_MLA = enum.auto()  # Supported by V1
     ROCM_AITER_MLA_VLLM_V1 = enum.auto()
+    ROCM_AITER_FA = enum.auto()  # used for ViT attn backend
     TORCH_SDPA = enum.auto()
     FLASHINFER = enum.auto()
     FLASHINFER_VLLM_V1 = enum.auto()
@@ -186,6 +187,10 @@ class Platform:
         else:
             return device_id
 
+    @classmethod
+    def get_vit_attn_backend(cls, support_fa: bool = False) -> _Backend:
+        return _Backend.TORCH_SDPA
+
     @classmethod
     def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int,
                              dtype: torch.dtype, kv_cache_dtype: Optional[str],
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index b2e69f60343f6..54ffc83cd565a 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -173,6 +173,18 @@ class RocmPlatform(Platform):
         "quark", "ptpc_fp8"
     ]
 
+    @classmethod
+    def get_vit_attn_backend(cls, support_fa: bool = False) -> _Backend:
+        if support_fa:
+            if (envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_MHA
+                    and on_gfx9()):
+                # Note: AITER FA is only supported for Qwen-VL models.
+                # TODO: Add support for other VL models in their model class.
+                return _Backend.ROCM_AITER_FA
+            if on_gfx9():
+                return _Backend.FLASH_ATTN
+        return _Backend.TORCH_SDPA
+
     @classmethod
     def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
                              kv_cache_dtype, block_size, use_v1,

From 4ac8437352a8945262e877d64162d741404768e2 Mon Sep 17 00:00:00 2001
From: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
Date: Fri, 1 Aug 2025 23:54:40 -0700
Subject: [PATCH 156/224] [Misc] Getting and passing ray runtime_env to workers
 (#22040)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
---
 tests/config/test_config_generation.py | 33 ++++++++++++++++++++++++++
 vllm/config.py                         |  5 ++++
 vllm/engine/arg_utils.py               | 11 +++++++++
 vllm/executor/ray_utils.py             |  7 ++++--
 vllm/ray/lazy_utils.py                 | 22 +++++++++++++++++
 vllm/utils/__init__.py                 | 12 +---------
 6 files changed, 77 insertions(+), 13 deletions(-)
 create mode 100644 vllm/ray/lazy_utils.py

diff --git a/tests/config/test_config_generation.py b/tests/config/test_config_generation.py
index 024e81fccc5f1..e37b6b95941e9 100644
--- a/tests/config/test_config_generation.py
+++ b/tests/config/test_config_generation.py
@@ -36,3 +36,36 @@ def test_cuda_empty_vs_unset_configs(monkeypatch: pytest.MonkeyPatch):
     assert deep_compare(normal_config_dict, empty_config_dict), (
         "Configs with normal CUDA_VISIBLE_DEVICES and CUDA_VISIBLE_DEVICES=\"\""
         " should be equivalent")
+
+
+def test_ray_runtime_env(monkeypatch: pytest.MonkeyPatch):
+    # In testing, this method needs to be nested inside as ray does not
+    # see the test module.
+    def create_config():
+        engine_args = EngineArgs(model="deepseek-ai/DeepSeek-V2-Lite",
+                                 trust_remote_code=True)
+        return engine_args.create_engine_config()
+
+    config = create_config()
+    parallel_config = config.parallel_config
+    assert parallel_config.ray_runtime_env is None
+
+    import ray
+    ray.init()
+
+    runtime_env = {
+        "env_vars": {
+            "TEST_ENV_VAR": "test_value",
+        },
+    }
+
+    config_ref = ray.remote(create_config).options(
+        runtime_env=runtime_env).remote()
+
+    config = ray.get(config_ref)
+    parallel_config = config.parallel_config
+    assert parallel_config.ray_runtime_env is not None
+    assert parallel_config.ray_runtime_env.env_vars().get(
+        "TEST_ENV_VAR") == "test_value"
+
+    ray.shutdown()
diff --git a/vllm/config.py b/vllm/config.py
index 95dae4275edf3..ee8f3dd98dd86 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -57,6 +57,7 @@ from vllm.utils import (DEFAULT_MAX_NUM_BATCHED_TOKENS,
 
 if TYPE_CHECKING:
     from _typeshed import DataclassInstance
+    from ray.runtime_env import RuntimeEnv
     from ray.util.placement_group import PlacementGroup
     from transformers.configuration_utils import PretrainedConfig
 
@@ -74,6 +75,7 @@ if TYPE_CHECKING:
 else:
     DataclassInstance = Any
     PlacementGroup = Any
+    RuntimeEnv = Any
     PretrainedConfig = Any
     ExecutorBase = Any
     QuantizationConfig = Any
@@ -2098,6 +2100,9 @@ class ParallelConfig:
     ray_workers_use_nsight: bool = False
     """Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler."""
 
+    ray_runtime_env: Optional["RuntimeEnv"] = None
+    """Ray runtime environment to pass to distributed workers."""
+
     placement_group: Optional["PlacementGroup"] = None
     """ray distributed model workers placement group."""
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 0d38b5b5302c1..47b3efa6af726 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -36,6 +36,7 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
 from vllm.logger import init_logger
 from vllm.platforms import CpuArchEnum, current_platform
 from vllm.plugins import load_general_plugins
+from vllm.ray.lazy_utils import is_ray_initialized
 from vllm.reasoning import ReasoningParserManager
 from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
 from vllm.transformers_utils.utils import check_gguf_file
@@ -1099,6 +1100,15 @@ class EngineArgs:
             kv_sharing_fast_prefill=self.kv_sharing_fast_prefill,
         )
 
+        ray_runtime_env = None
+        if is_ray_initialized():
+            # Ray Serve LLM calls `create_engine_config` in the context
+            # of a Ray task, therefore we check is_ray_initialized()
+            # as opposed to is_in_ray_actor().
+            import ray
+            ray_runtime_env = ray.get_runtime_context().runtime_env
+            logger.info("Using ray runtime env: %s", ray_runtime_env)
+
         # Get the current placement group if Ray is initialized and
         # we are in a Ray actor. If so, then the placement group will be
         # passed to spawned processes.
@@ -1211,6 +1221,7 @@ class EngineArgs:
             max_parallel_loading_workers=self.max_parallel_loading_workers,
             disable_custom_all_reduce=self.disable_custom_all_reduce,
             ray_workers_use_nsight=self.ray_workers_use_nsight,
+            ray_runtime_env=ray_runtime_env,
             placement_group=placement_group,
             distributed_executor_backend=self.distributed_executor_backend,
             worker_cls=self.worker_cls,
diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py
index 033ecc00853ba..7abaffa54c089 100644
--- a/vllm/executor/ray_utils.py
+++ b/vllm/executor/ray_utils.py
@@ -295,9 +295,12 @@ def initialize_ray_cluster(
             logger.warning(
                 "No existing RAY instance detected. "
                 "A new instance will be launched with current node resources.")
-            ray.init(address=ray_address, num_gpus=parallel_config.world_size)
+            ray.init(address=ray_address,
+                     num_gpus=parallel_config.world_size,
+                     runtime_env=parallel_config.ray_runtime_env)
     else:
-        ray.init(address=ray_address)
+        ray.init(address=ray_address,
+                 runtime_env=parallel_config.ray_runtime_env)
 
     device_str = current_platform.ray_device_key
     if not device_str:
diff --git a/vllm/ray/lazy_utils.py b/vllm/ray/lazy_utils.py
new file mode 100644
index 0000000000000..bb3535579cfdf
--- /dev/null
+++ b/vllm/ray/lazy_utils.py
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+def is_ray_initialized():
+    """Check if Ray is initialized."""
+    try:
+        import ray
+        return ray.is_initialized()
+    except ImportError:
+        return False
+
+
+def is_in_ray_actor():
+    """Check if we are in a Ray actor."""
+
+    try:
+        import ray
+        return (ray.is_initialized()
+                and ray.get_runtime_context().get_actor_id() is not None)
+    except ImportError:
+        return False
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index 0d3fa6b059beb..3318ae5106377 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -72,6 +72,7 @@ from typing_extensions import Never, ParamSpec, TypeIs, assert_never
 
 import vllm.envs as envs
 from vllm.logger import enable_trace_function_call, init_logger
+from vllm.ray.lazy_utils import is_in_ray_actor
 
 if TYPE_CHECKING:
     from argparse import Namespace
@@ -2835,17 +2836,6 @@ def zmq_socket_ctx(
         ctx.destroy(linger=linger)
 
 
-def is_in_ray_actor():
-    """Check if we are in a Ray actor."""
-
-    try:
-        import ray
-        return (ray.is_initialized()
-                and ray.get_runtime_context().get_actor_id() is not None)
-    except ImportError:
-        return False
-
-
 def _maybe_force_spawn():
     """Check if we need to force the use of the `spawn` multiprocessing start
     method.

From 8564dc9448ed8648088c25248313933308ae36d8 Mon Sep 17 00:00:00 2001
From: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com>
Date: Fri, 1 Aug 2025 23:55:34 -0700
Subject: [PATCH 157/224] Fix test_kv_sharing_fast_prefill flakiness (#22038)

Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
---
 tests/v1/e2e/test_kv_sharing_fast_prefill.py | 35 +++++++++++++++++---
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/tests/v1/e2e/test_kv_sharing_fast_prefill.py b/tests/v1/e2e/test_kv_sharing_fast_prefill.py
index 616fc7a860599..f5a7b9cc276b3 100644
--- a/tests/v1/e2e/test_kv_sharing_fast_prefill.py
+++ b/tests/v1/e2e/test_kv_sharing_fast_prefill.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import gc
 import random
 from typing import Optional, Union
 
@@ -10,6 +9,7 @@ import torch
 
 from vllm import LLM, SamplingParams
 from vllm.config import CompilationConfig, CompilationLevel
+from vllm.distributed import cleanup_dist_env_and_memory
 from vllm.forward_context import get_forward_context
 from vllm.model_executor.models.gemma3n import Gemma3nForConditionalGeneration
 from vllm.model_executor.models.registry import ModelRegistry
@@ -18,6 +18,9 @@ from vllm.sequence import IntermediateTensors
 
 from ...utils import fork_new_process_for_each_test
 
+# global seed
+SEED = 42
+
 
 class TestGemma3nForConditionalGeneration(Gemma3nForConditionalGeneration):
 
@@ -95,8 +98,25 @@ def test_prompts():
     return prompts
 
 
+def cleanup(llm: LLM, compilation_config: CompilationConfig):
+    # hacky: below lines are required to free up memory for the next test
+    # when setting VLLM_ENABLE_V1_MULTIPROCESSING=0, del llm is not sufficient
+    # TODO(sarckk): when enforce_eager=False, memory is not freed:
+    # find out why and re-enable test for enforce_eager=False case
+    llm_engine = llm.llm_engine.engine_core.engine_core
+    model_runner = llm_engine.model_executor.driver_worker.worker.model_runner
+    del model_runner.model
+    del model_runner.kv_caches
+    del compilation_config.static_forward_context
+    compilation_config.static_forward_context = {}
+
+    del llm
+    torch.cuda.empty_cache()
+    cleanup_dist_env_and_memory()
+
+
 @fork_new_process_for_each_test
-@pytest.mark.parametrize("enforce_eager", [True, False])
+@pytest.mark.parametrize("enforce_eager", [True])
 def test_kv_sharing_fast_prefill(
     monkeypatch: pytest.MonkeyPatch,
     enforce_eager: bool,
@@ -115,23 +135,28 @@ def test_kv_sharing_fast_prefill(
     with monkeypatch.context() as m:
         m.setenv("VLLM_USE_V1", "1")
 
+        # Make scheduling deterministic for reproducibility
+        m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
+
         llm = LLM(
             model="google/gemma-3n-E2B-it",
             enforce_eager=enforce_eager,
             compilation_config=compilation_config,
+            seed=SEED,
         )
         ref_responses = llm.generate(test_prompts, sampling_params)
 
-        del llm
-        gc.collect()
-        torch.cuda.empty_cache()
+        cleanup(llm, compilation_config)
 
         llm = LLM(model="google/gemma-3n-E2B-it",
                   enforce_eager=enforce_eager,
                   compilation_config=compilation_config,
+                  seed=SEED,
                   kv_sharing_fast_prefill=True)
         optimized_responses = llm.generate(test_prompts, sampling_params)
 
+        cleanup(llm, compilation_config)
+
         misses = 0
 
         for ref_response, optimized_response in zip(ref_responses,

From c64861d63c1a5362bfad443daf7a096f1bcfd1e4 Mon Sep 17 00:00:00 2001
From: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com>
Date: Sat, 2 Aug 2025 02:55:57 -0400
Subject: [PATCH 158/224] [Bugfix] Mamba2 remove bugged initial state condition
 in chunk scan (#22034)

Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com>
---
 .../model_executor/layers/mamba/ops/ssd_chunk_scan.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
index 365e1c54b555a..61eff0c008f60 100644
--- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
+++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
@@ -476,15 +476,8 @@ def _chunk_scan_fwd(
             # with initial states, we need to take care of how
             # seq_idx crosses the boundaries
             assert batch == 1, "chunk scan only supports initial states with batch 1"
-
-            if initial_states.shape[0] == 1:
-                # no in this case no point to use initial states
-                initial_states = None
-            else:
-                assert chunk_indices is not None and chunk_offsets is not None, \
-                    (
-                        "chunk_indices and chunk_offsets should have been set"
-                    )
+            assert chunk_indices is not None and chunk_offsets is not None, \
+                "chunk_indices and chunk_offsets should have been set"
         else:
             chunk_indices, chunk_offsets = None, None
     else:

From 067c34a1559400e956311f067ddd185f54207a2b Mon Sep 17 00:00:00 2001
From: Roger Wang <hey@rogerw.me>
Date: Sat, 2 Aug 2025 00:19:48 -0700
Subject: [PATCH 159/224] docs: remove deprecated disable-log-requests flag
 (#22113)

Signed-off-by: Roger Wang <hey@rogerw.me>
---
 .buildkite/scripts/tpu/run_bm.sh                       |  1 -
 benchmarks/README.md                                   | 10 +++++-----
 benchmarks/auto_tune/auto_tune.sh                      |  1 -
 benchmarks/benchmark_serving.py                        |  3 +--
 benchmarks/benchmark_serving_structured_output.py      |  2 +-
 docs/design/p2p_nccl_connector.md                      |  8 --------
 docs/models/supported_models.md                        |  2 +-
 .../disagg_example_p2p_nccl_xpyd.sh                    |  2 --
 examples/online_serving/prometheus_grafana/README.md   |  3 +--
 .../disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh  |  2 --
 tests/entrypoints/openai/correctness/test_lmeval.py    |  2 +-
 tests/entrypoints/openai/test_chunked_prompt.py        |  2 --
 tests/models/quantization/test_bitsandbytes.py         |  1 -
 .../kv_connector/nixl_integration/run_accuracy_test.sh |  2 --
 .../nixl_integration/run_edge_case_test.sh             |  2 --
 .../nixl_integration/run_tpu_disagg_accuracy_test.sh   |  3 ---
 .../nixl_integration/run_tpu_edge_case_test.sh         |  2 --
 tests/v1/sample/test_logprobs_e2e.py                   |  2 +-
 vllm/utils/__init__.py                                 |  5 +++--
 19 files changed, 14 insertions(+), 41 deletions(-)

diff --git a/.buildkite/scripts/tpu/run_bm.sh b/.buildkite/scripts/tpu/run_bm.sh
index beecaf7a740ae..b1e17b438578d 100755
--- a/.buildkite/scripts/tpu/run_bm.sh
+++ b/.buildkite/scripts/tpu/run_bm.sh
@@ -44,7 +44,6 @@ echo
 
 VLLM_USE_V1=1 vllm serve $MODEL \
  --seed 42 \
- --disable-log-requests \
  --max-num-seqs $MAX_NUM_SEQS \
  --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
  --tensor-parallel-size $TENSOR_PARALLEL_SIZE \
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 644517235b122..d6442a4fc3872 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -91,7 +91,7 @@ become available.
 First start serving your model
 
 ```bash
-vllm serve NousResearch/Hermes-3-Llama-3.1-8B --disable-log-requests
+vllm serve NousResearch/Hermes-3-Llama-3.1-8B
 ```
 
 Then run the benchmarking script
@@ -146,7 +146,7 @@ If the dataset you want to benchmark is not supported yet in vLLM, even then you
 
 ```bash
 # start server
-VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.1-8B-Instruct --disable-log-requests
+VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.1-8B-Instruct
 ```
 
 ```bash
@@ -171,7 +171,7 @@ You can skip applying chat template if your data already has it by using `--cust
 
 ```bash
 # need a model with vision capability here
-vllm serve Qwen/Qwen2-VL-7B-Instruct --disable-log-requests
+vllm serve Qwen/Qwen2-VL-7B-Instruct
 ```
 
 ```bash
@@ -205,7 +205,7 @@ vllm bench serve \
 ### Other HuggingFaceDataset Examples
 
 ```bash
-vllm serve Qwen/Qwen2-VL-7B-Instruct --disable-log-requests
+vllm serve Qwen/Qwen2-VL-7B-Instruct
 ```
 
 `lmms-lab/LLaVA-OneVision-Data`:
@@ -430,7 +430,7 @@ Benchmark the performance of structured output generation (JSON, grammar, regex)
 ### Server Setup
 
 ```bash
-vllm serve NousResearch/Hermes-3-Llama-3.1-8B --disable-log-requests
+vllm serve NousResearch/Hermes-3-Llama-3.1-8B
 ```
 
 ### JSON Schema Benchmark
diff --git a/benchmarks/auto_tune/auto_tune.sh b/benchmarks/auto_tune/auto_tune.sh
index 3cd8580e065dd..df26376504b95 100644
--- a/benchmarks/auto_tune/auto_tune.sh
+++ b/benchmarks/auto_tune/auto_tune.sh
@@ -60,7 +60,6 @@ start_server() {
     pkill -f vllm
 
     VLLM_USE_V1=1 VLLM_SERVER_DEV_MODE=1 VLLM_TORCH_PROFILER_DIR=$profile_dir vllm serve $MODEL \
-        --disable-log-requests \
         --port 8004 \
         --gpu-memory-utilization $gpu_memory_utilization \
         --max-num-seqs $max_num_seqs \
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 3affa18ae3a4f..93b72211eb332 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -5,8 +5,7 @@ r"""Benchmark online serving throughput.
 On the server side, run one of the following commands:
     vLLM OpenAI API server
     vllm serve <your_model> \
-        --swap-space 16 \
-        --disable-log-requests
+        --swap-space 16
 
 On the client side, run:
     python benchmarks/benchmark_serving.py \
diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py
index 2a22f122c78e6..ca6843a72aa36 100644
--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@@ -4,7 +4,7 @@ r"""Benchmark online serving throughput with structured outputs.
 
 On the server side, run one of the following commands:
     (vLLM OpenAI API server)
-    vllm serve <your_model> --disable-log-requests
+    vllm serve <your_model>
 
 On the client side, run:
     python benchmarks/benchmark_serving_structured_output.py \
diff --git a/docs/design/p2p_nccl_connector.md b/docs/design/p2p_nccl_connector.md
index 94af8bedd24d2..adf838306bc77 100644
--- a/docs/design/p2p_nccl_connector.md
+++ b/docs/design/p2p_nccl_connector.md
@@ -109,7 +109,6 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.9 \
-        --disable-log-request \
         --kv-transfer-config \
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_buffer_size":"1e1","kv_port":"21001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20001"}}' > /var/vllm.log 2>&1 &
     ```
@@ -131,7 +130,6 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.7 \
-        --disable-log-request \
         --kv-transfer-config \
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_buffer_size":"8e9","kv_port":"22001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20002"}}' > /var/vllm.log 2>&1 &
     ```
@@ -153,7 +151,6 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.7 \
-        --disable-log-request \
         --kv-transfer-config \
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_buffer_size":"8e9","kv_port":"23001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20003"}}' > /var/vllm.log 2>&1 &
     ```
@@ -175,7 +172,6 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.7 \
-        --disable-log-request \
         --kv-transfer-config \
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_buffer_size":"8e9","kv_port":"24001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20004"}}' > /var/vllm.log 2>&1 &
     ```
@@ -206,7 +202,6 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.9 \
-        --disable-log-request \
         --kv-transfer-config \
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_buffer_size":"1e1","kv_port":"21001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20001"}}' > /var/vllm.log 2>&1 &
     ```
@@ -228,7 +223,6 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.9 \
-        --disable-log-request \
         --kv-transfer-config \
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_buffer_size":"1e1","kv_port":"22001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20002"}}' > /var/vllm.log 2>&1 &
     ```
@@ -250,7 +244,6 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.9 \
-        --disable-log-request \
         --kv-transfer-config \
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_buffer_size":"1e1","kv_port":"23001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20003"}}' > /var/vllm.log 2>&1 &
     ```
@@ -272,7 +265,6 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.7 \
-        --disable-log-request \
         --kv-transfer-config \
         '{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_buffer_size":"8e9","kv_port":"24001","kv_connector_extra_config":{"proxy_ip":"10.0.1.1","proxy_port":"30001","http_port":"20004"}}' > /var/vllm.log 2>&1 &
     ```
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 56c77a1e5f118..bd7a57b436213 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -255,7 +255,7 @@ export https_proxy=http://your.proxy.server:port
 https_proxy=http://your.proxy.server:port huggingface-cli download <model_name>
 
 # or use vllm cmd directly
-https_proxy=http://your.proxy.server:port  vllm serve <model_name> --disable-log-requests
+https_proxy=http://your.proxy.server:port  vllm serve <model_name>
 ```
 
 - Set the proxy in Python interpreter:
diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
index 568f7a43b4962..7b0b12bb34d25 100644
--- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
+++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
@@ -178,7 +178,6 @@ main() {
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.9 \
-        --disable-log-request \
         --kv-transfer-config \
         "{\"kv_connector\":\"P2pNcclConnector\",\"kv_role\":\"kv_producer\",\"kv_buffer_size\":\"1e1\",\"kv_port\":\"$kv_port\",\"kv_connector_extra_config\":{\"proxy_ip\":\"0.0.0.0\",\"proxy_port\":\"$PROXY_PORT\",\"http_port\":\"$port\",\"send_type\":\"PUT_ASYNC\",\"nccl_num_channels\":\"16\"}}" > prefill$((i+1)).log 2>&1 &
         PIDS+=($!)
@@ -207,7 +206,6 @@ main() {
         --max-num-seqs 256 \
         --trust-remote-code \
         --gpu-memory-utilization 0.7 \
-        --disable-log-request \
         --kv-transfer-config \
         "{\"kv_connector\":\"P2pNcclConnector\",\"kv_role\":\"kv_consumer\",\"kv_buffer_size\":\"8e9\",\"kv_port\":\"$kv_port\",\"kv_connector_extra_config\":{\"proxy_ip\":\"0.0.0.0\",\"proxy_port\":\"$PROXY_PORT\",\"http_port\":\"$port\",\"send_type\":\"PUT_ASYNC\",\"nccl_num_channels\":\"16\"}}" > decode$((i+1)).log 2>&1 &
         PIDS+=($!)
diff --git a/examples/online_serving/prometheus_grafana/README.md b/examples/online_serving/prometheus_grafana/README.md
index 7c4e649e6d029..5cd4dab5a8fa7 100644
--- a/examples/online_serving/prometheus_grafana/README.md
+++ b/examples/online_serving/prometheus_grafana/README.md
@@ -13,8 +13,7 @@ Prometheus metric logging is enabled by default in the OpenAI-compatible server.
 
 ```bash
 vllm serve mistralai/Mistral-7B-v0.1 \
-    --max-model-len 2048 \
-    --disable-log-requests
+    --max-model-len 2048
 ```
 
 Launch Prometheus and Grafana servers with `docker compose`:
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
index 5719fa8212923..1284466a45580 100644
--- a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
+++ b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
@@ -28,7 +28,6 @@ if [[ $1 == "prefiller" ]]; then
         CUDA_VISIBLE_DEVICES=0 \
         vllm serve $MODEL \
         --port 8100 \
-        --disable-log-requests \
         --enforce-eager \
         --kv-transfer-config \
         '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_producer","kv_connector_extra_config": {"discard_partial_chunks": false, "lmcache_rpc_port": "producer1"}}'
@@ -46,7 +45,6 @@ elif [[ $1 == "decoder" ]]; then
         CUDA_VISIBLE_DEVICES=1 \
         vllm serve $MODEL \
         --port 8200 \
-        --disable-log-requests \
         --enforce-eager \
         --kv-transfer-config \
         '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_consumer","kv_connector_extra_config": {"discard_partial_chunks": false, "lmcache_rpc_port": "consumer1"}}'
diff --git a/tests/entrypoints/openai/correctness/test_lmeval.py b/tests/entrypoints/openai/correctness/test_lmeval.py
index a07a147cdc2b2..d75731637d282 100644
--- a/tests/entrypoints/openai/correctness/test_lmeval.py
+++ b/tests/entrypoints/openai/correctness/test_lmeval.py
@@ -22,7 +22,7 @@ TASK = "gsm8k"
 FILTER = "exact_match,strict-match"
 RTOL = 0.03
 EXPECTED_VALUE = 0.54
-DEFAULT_ARGS = ["--max-model-len", "4096", "--disable-log-requests"]
+DEFAULT_ARGS = ["--max-model-len", "4096"]
 MORE_ARGS_LIST = [
     [],  # Default
     ["--enable-chunked-prefill"],  # Chunked
diff --git a/tests/entrypoints/openai/test_chunked_prompt.py b/tests/entrypoints/openai/test_chunked_prompt.py
index 3c8ed955a65a2..c8160c5f2d0e3 100644
--- a/tests/entrypoints/openai/test_chunked_prompt.py
+++ b/tests/entrypoints/openai/test_chunked_prompt.py
@@ -26,8 +26,6 @@ def server():
         "--enable-chunked-prefill",
         "--max-num-batched-tokens",
         "1000",
-        # large prompts create a lot of output
-        "--disable-log-requests",
     ]
 
     with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index 8cb269d7e9496..e0e919b62b217 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -102,7 +102,6 @@ def test_load_tp_4bit_bnb_model(hf_runner, vllm_runner, example_prompts,
 def test_load_pp_4bit_bnb_model(model_name, description) -> None:
     common_args = [
         "--disable-log-stats",
-        "--disable-log-requests",
         "--dtype",
         "bfloat16",
         "--enable-prefix-caching",
diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
index b48655d80eefd..9322410ec99e9 100755
--- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
@@ -88,7 +88,6 @@ run_tests_for_model() {
     BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
     --port $PORT \
     --enforce-eager \
-    --disable-log-requests \
     --gpu-memory-utilization 0.2 \
     --tensor-parallel-size $PREFILLER_TP_SIZE \
     --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'"
@@ -121,7 +120,6 @@ run_tests_for_model() {
     BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
     --port $PORT \
     --enforce-eager \
-    --disable-log-requests \
     --gpu-memory-utilization 0.2 \
     --tensor-parallel-size $DECODER_TP_SIZE \
     --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'"
diff --git a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
index 98903a176e28b..b64461292910d 100644
--- a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
@@ -57,7 +57,6 @@ run_tests_for_model() {
   BASE_CMD="CUDA_VISIBLE_DEVICES=0 VLLM_NIXL_SIDE_CHANNEL_PORT=5559 vllm serve $model_name \
   --port $PREFILL_PORT \
   --enforce-eager \
-  --disable-log-requests \
   --gpu-memory-utilization 0.2 \
   --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'"
 
@@ -76,7 +75,6 @@ run_tests_for_model() {
   BASE_CMD="CUDA_VISIBLE_DEVICES=1 VLLM_NIXL_SIDE_CHANNEL_PORT=6000 vllm serve $model_name \
   --port $DECODE_PORT \
   --enforce-eager \
-  --disable-log-requests \
   --gpu-memory-utilization 0.2 \
   --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'"
 
diff --git a/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
index 45779d16914f0..ea125f99fc42c 100644
--- a/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
@@ -63,7 +63,6 @@ launch_baseline() {
       --seed 42 \
       --block-size ${BLOCK_SIZE} \
       --gpu-memory-utilization 0.5 \
-      --disable-log-requests \
       --enforce-eager"
   echo ${BASELINE_BASE_CMD}
   ssh -tt ${BASELINE_HOST} "${BASELINE_BASE_CMD}" &
@@ -87,7 +86,6 @@ launch_pd() {
       --block-size ${BLOCK_SIZE} \
       --enforce-eager \
       --gpu-memory-utilization 0.5 \
-      --disable-log-requests \
       --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'"
 
 
@@ -106,7 +104,6 @@ launch_pd() {
       --block-size ${BLOCK_SIZE} \
       --enforce-eager \
       --gpu-memory-utilization 0.5 \
-      --disable-log-requests \
       --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'"
 
   echo ${PREFILL_BASE_CMD}
diff --git a/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
index c37c92fdf5d3f..8ba653770c4f0 100644
--- a/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
@@ -68,7 +68,6 @@ launch_pd() {
       --block-size ${BLOCK_SIZE} \
       --enforce-eager \
       --gpu-memory-utilization 0.5 \
-      --disable-log-requests \
       --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'"
 
 
@@ -87,7 +86,6 @@ launch_pd() {
       --block-size ${BLOCK_SIZE} \
       --enforce-eager \
       --gpu-memory-utilization 0.5 \
-      --disable-log-requests \
       --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'"
 
   echo ${PREFILL_BASE_CMD}
diff --git a/tests/v1/sample/test_logprobs_e2e.py b/tests/v1/sample/test_logprobs_e2e.py
index 50b14a15dc164..7f41355ff7ce4 100644
--- a/tests/v1/sample/test_logprobs_e2e.py
+++ b/tests/v1/sample/test_logprobs_e2e.py
@@ -15,7 +15,7 @@ EXPECTED_VALUE = 0.62
 MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 MODEL_ARGS = f"pretrained={MODEL},enforce_eager=True,enable_prefix_caching=False,gpu_memory_utilization=0.8"  # noqa: E501
 SERVER_ARGS = [
-    "--enforce_eager", "--no_enable_prefix_caching", "--disable-log-requests",
+    "--enforce_eager", "--no_enable_prefix_caching",
     "--gpu-memory-utilization=0.8"
 ]
 NUM_CONCURRENT = 100
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index 3318ae5106377..ce62282c2199f 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -1673,8 +1673,9 @@ class FlexibleArgumentParser(ArgumentParser):
                 # Special case warning because the warning below won't trigger
                 # if –-disable-log-requests because its value is default.
                 logger.warning_once(
-                    "argument '--disable-log-requests' is deprecated. This "
-                    "will be removed in v0.12.0.")
+                    "argument '--disable-log-requests' is deprecated and "
+                    "replaced with '--enable-log-requests'. This will be "
+                    "removed in v0.12.0.")
             namespace, args = super().parse_known_args(args, namespace)
             for action in FlexibleArgumentParser._deprecated:
                 if (hasattr(namespace, dest := action.dest)

From 58eee5f2e05b74eb2cb1a3bbda9c04df4805e4cc Mon Sep 17 00:00:00 2001
From: Vadim Gimpelson <156319763+vadiklyutiy@users.noreply.github.com>
Date: Sat, 2 Aug 2025 12:43:52 +0400
Subject: [PATCH 160/224] [PERF] Use faster way of decode in tokenizer: avoid
 useless list-to-list conversion (#20000)

Signed-off-by: Vadim Gimpelson <vadim.gimpelson@centml.ai>
---
 vllm/transformers_utils/tokenizer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 24ddd35abea60..6a31a41980695 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -50,11 +50,12 @@ def decode_tokens(
     `skip_special_tokens=None` means to use the backend's default
     settings.
     """
+    decode_method = getattr(tokenizer, "_decode", tokenizer.decode)
     if skip_special_tokens is not None:
-        return tokenizer.decode(token_ids,
-                                skip_special_tokens=skip_special_tokens)
+        return decode_method(token_ids,
+                             skip_special_tokens=skip_special_tokens)
 
-    return tokenizer.decode(token_ids)
+    return decode_method(token_ids)
 
 
 def encode_tokens(

From 25373b6c6cc2068e3914fa906d3240088f7af157 Mon Sep 17 00:00:00 2001
From: Yuxuan Zhang <2448370773@qq.com>
Date: Sat, 2 Aug 2025 16:46:57 +0800
Subject: [PATCH 161/224] for glm-4.1V update (#22000)

Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
---
 docs/models/supported_models.md               |  3 ++-
 tests/models/registry.py                      | 11 +++++-----
 tests/tool_use/test_glm4_moe_tool_parser.py   |  2 +-
 .../model_executor/layers/rotary_embedding.py |  2 +-
 vllm/model_executor/models/glm4_1v.py         | 21 ++++++++++++-------
 vllm/model_executor/models/registry.py        |  1 +
 6 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index bd7a57b436213..c058c20f1ed73 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -591,7 +591,8 @@ See [this page](generative_models.md) for more information on how to use generat
 | `Gemma3ForConditionalGeneration` | Gemma 3 | T + I<sup>+</sup> | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | ⚠️ |
 | `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ |
-| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `Glm4v_moeForConditionalGeneration` | GLM-4.5V | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5V-Air`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
 | `H2OVLChatModel` | H2OVL | T + I<sup>E+</sup> | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc. | | ✅︎ | ✅︎ |
 | `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |
diff --git a/tests/models/registry.py b/tests/models/registry.py
index fdc7888c85efb..d88d77cddcca5 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -377,9 +377,10 @@ _MULTIMODAL_EXAMPLE_MODELS = {
     "GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
                                         trust_remote_code=True,
                                         hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
-    "Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking", min_transformers_version="4.53"),  # noqa: E501
-    "Glm4MoeForCausalLM": _HfExamplesInfo("THUDM/GLM-4.5",
-                                          min_transformers_version="4.54",
+    "Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking"),  # noqa: E501
+    "Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5",
+                                          min_transformers_version="4.54"),   # noqa: E501
+    "Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
                                           is_available_online=False),   # noqa: E501
     "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
                                       extras={"2b": "h2oai/h2ovl-mississippi-2b"},  # noqa: E501
@@ -515,8 +516,8 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
                                             is_available_online=False,
                                             speculative_model="openbmb/MiniCPM-2B-sft-bf16",
                                             tokenizer="openbmb/MiniCPM-2B-sft-bf16"),
-    "Glm4MoeMTPModel": _HfExamplesInfo("THUDM/GLM-4.5",
-                                        speculative_model="THUDM/GLM-4.5",
+    "Glm4MoeMTPModel": _HfExamplesInfo("zai-org/GLM-4.5",
+                                        speculative_model="zai-org/GLM-4.5",
                                         min_transformers_version="4.54",
                                         is_available_online=False),
     "MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",
diff --git a/tests/tool_use/test_glm4_moe_tool_parser.py b/tests/tool_use/test_glm4_moe_tool_parser.py
index 478f4b9166725..91913c933184e 100644
--- a/tests/tool_use/test_glm4_moe_tool_parser.py
+++ b/tests/tool_use/test_glm4_moe_tool_parser.py
@@ -12,7 +12,7 @@ from vllm.transformers_utils.tokenizer import get_tokenizer
 
 pytest.skip("skip glm4_moe parser test", allow_module_level=True)
 # Use a common model that is likely to be available
-MODEL = "THUDM/GLM-4.5"
+MODEL = "zai-org/GLM-4.5"
 
 
 @pytest.fixture(scope="module")
diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py
index dddd4d6a71170..24dd86620fe91 100644
--- a/vllm/model_executor/layers/rotary_embedding.py
+++ b/vllm/model_executor/layers/rotary_embedding.py
@@ -1096,7 +1096,7 @@ class MRotaryEmbedding(RotaryEmbedding):
                 audio_feature_lengths=audio_feature_lengths,
                 use_audio_in_video=use_audio_in_video,
             )
-        elif "glm4v" in hf_config.model_type:
+        elif hf_config.model_type in ["glm4v", "glm4v_moe"]:
             return cls._glm4v_get_input_positions_tensor(
                 input_tokens=input_tokens,
                 hf_config=hf_config,
diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py
index 7c9840790fe3e..7983895687a38 100644
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@@ -37,8 +37,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 from einops import rearrange
 from transformers import BatchFeature
-from transformers.models.glm4v.configuration_glm4v import (Glm4vConfig,
-                                                           Glm4vVisionConfig)
+from transformers.models.glm4v.configuration_glm4v import Glm4vVisionConfig
 from transformers.models.glm4v.image_processing_glm4v import (
     Glm4vImageProcessor, smart_resize)
 from transformers.models.glm4v.video_processing_glm4v import (
@@ -801,7 +800,7 @@ class Glm4vVisionTransformer(nn.Module):
 class Glm4vProcessingInfo(BaseProcessingInfo):
 
     def get_hf_config(self):
-        return self.ctx.get_hf_config(Glm4vConfig)
+        return self.ctx.get_hf_config()
 
     def get_tokenizer(self):
         return self.ctx.tokenizer
@@ -1253,7 +1252,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
-        config: Glm4vConfig = vllm_config.model_config.hf_config
+        config = vllm_config.model_config.hf_config
         quant_config = vllm_config.quant_config
         multimodal_config = vllm_config.model_config.multimodal_config
 
@@ -1267,12 +1266,18 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
             prefix=maybe_prefix(prefix, "visual"),
         )
 
+        if config.model_type == "glm4v":
+            architectures = ["Glm4ForCausalLM"]
+        elif config.model_type == "glm4v_moe":
+            architectures = ["Glm4MoeForCausalLM"]
+        else:
+            architectures = None
+
         self.language_model = init_vllm_registered_model(
             vllm_config=vllm_config,
-            prefix=maybe_prefix(prefix, ""),
-            architectures=["Glm4ForCausalLM"],
-            hf_config=self.config.get_text_config(),
-        )
+            hf_config=config.text_config,
+            prefix=maybe_prefix(prefix, "language_model"),
+            architectures=architectures)
 
         self.make_empty_intermediate_tensors = (
             self.language_model.make_empty_intermediate_tensors)
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index 0c5d87a7dc472..9b6ab52d86805 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -206,6 +206,7 @@ _MULTIMODAL_MODELS = {
     "Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"),  # noqa: E501
     "GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
     "Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"),  # noqa: E501
+    "Glm4v_moeForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"),  # noqa: E501
     "GraniteSpeechForConditionalGeneration": ("granite_speech", "GraniteSpeechForConditionalGeneration"),  # noqa: E501
     "H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
     "InternVLChatModel": ("internvl", "InternVLChatModel"),

From b690e34824fd5a5c4054a0c0468ebfb6aa1dd215 Mon Sep 17 00:00:00 2001
From: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com>
Date: Sat, 2 Aug 2025 04:59:34 -0400
Subject: [PATCH 162/224] [Model] Mamba2 preallocate SSM output tensor to avoid
 d2d copy overhead (#21075)

Signed-off-by: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com>
Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com>
---
 tests/kernels/mamba/test_mamba_ssm.py         | 74 ++++++++++---------
 tests/kernels/mamba/test_mamba_ssm_ssd.py     | 23 +++---
 .../layers/mamba/mamba_mixer.py               |  6 +-
 .../layers/mamba/mamba_mixer2.py              | 54 ++++++++------
 .../layers/mamba/ops/mamba_ssm.py             | 16 ++--
 .../layers/mamba/ops/ssd_chunk_scan.py        | 19 ++---
 .../layers/mamba/ops/ssd_combined.py          | 28 ++++---
 vllm/model_executor/models/phi4flash.py       |  6 +-
 vllm/model_executor/models/plamo2.py          | 36 +++++----
 9 files changed, 144 insertions(+), 118 deletions(-)

diff --git a/tests/kernels/mamba/test_mamba_ssm.py b/tests/kernels/mamba/test_mamba_ssm.py
index 8dece26ddb29c..4c32ae81b34c5 100644
--- a/tests/kernels/mamba/test_mamba_ssm.py
+++ b/tests/kernels/mamba/test_mamba_ssm.py
@@ -365,6 +365,7 @@ def test_selective_state_update(dim, dstate, has_z, itype):
     batch_size = 1
     state = torch.randn(batch_size, dim, dstate, dtype=itype, device=device)
     x = torch.randn(batch_size, dim, device=device, dtype=itype)
+    out = torch.empty_like(x)
     dt = torch.randn(batch_size, dim, device=device, dtype=itype)
     dt_bias = torch.rand(dim, device=device) - 4.0
     A = -torch.rand(dim, dstate, device=device) - 1.0
@@ -373,16 +374,17 @@ def test_selective_state_update(dim, dstate, has_z, itype):
     D = torch.randn(dim, device=device)
     z = torch.randn_like(x) if has_z else None
     state_ref = state.detach().clone()
-    out = selective_state_update(state,
-                                 x,
-                                 dt,
-                                 A,
-                                 B,
-                                 C,
-                                 D=D,
-                                 z=z,
-                                 dt_bias=dt_bias,
-                                 dt_softplus=True)
+    selective_state_update(state,
+                           x,
+                           dt,
+                           A,
+                           B,
+                           C,
+                           D=D,
+                           z=z,
+                           dt_bias=dt_bias,
+                           dt_softplus=True,
+                           out=out)
     out_ref = selective_state_update_ref(state_ref,
                                          x,
                                          dt,
@@ -581,6 +583,7 @@ def test_selective_state_update_with_batch_indices(with_padding, dim, dstate,
     ],
                                         dim=0)
     x = torch.randn(padded_batch_size, dim, device=device, dtype=itype)
+    out = torch.empty_like(x)
     dt = torch.randn(padded_batch_size, dim, device=device, dtype=itype)
     dt_bias = torch.rand(dim, device=device) - 4.0
     A = -torch.rand(dim, dstate, device=device) - 1.0
@@ -590,18 +593,19 @@ def test_selective_state_update_with_batch_indices(with_padding, dim, dstate,
     z = torch.randn_like(x) if has_z else None
     state_ref = state[state_indices, :].clone()
     state_before = state.clone()
-    out = selective_state_update(state,
-                                 x,
-                                 dt,
-                                 A,
-                                 B,
-                                 C,
-                                 D=D,
-                                 z=z,
-                                 dt_bias=dt_bias,
-                                 dt_softplus=True,
-                                 state_batch_indices=padded_state_indices,
-                                 pad_slot_id=PAD_SLOT_ID)
+    selective_state_update(state,
+                           x,
+                           dt,
+                           A,
+                           B,
+                           C,
+                           D=D,
+                           z=z,
+                           dt_bias=dt_bias,
+                           dt_softplus=True,
+                           state_batch_indices=padded_state_indices,
+                           pad_slot_id=PAD_SLOT_ID,
+                           out=out)
     out_ref = selective_state_update_ref(state_ref,
                                          x[:batch_size],
                                          dt[:batch_size],
@@ -665,6 +669,7 @@ def test_selective_state_update_with_heads_with_batch_indices(
         dtype=torch.int32, device=device)
 
     x = torch.randn(batch_size, nheads, headdim, device=device, dtype=itype)
+    out = torch.empty_like(x)
     if not tie_hdim:
         dt = torch.randn(batch_size,
                          nheads,
@@ -691,18 +696,19 @@ def test_selective_state_update_with_heads_with_batch_indices(
     C = torch.randn(batch_size, ngroups, dstate, device=device)
     z = torch.randn_like(x) if has_z else None
     state_ref = state[state_indices, :].detach().clone()
-    out = selective_state_update(state,
-                                 x,
-                                 dt,
-                                 A,
-                                 B,
-                                 C,
-                                 D=D,
-                                 z=z,
-                                 dt_bias=dt_bias,
-                                 dt_softplus=True,
-                                 state_batch_indices=state_indices,
-                                 pad_slot_id=PAD_SLOT_ID)
+    selective_state_update(state,
+                           x,
+                           dt,
+                           A,
+                           B,
+                           C,
+                           D=D,
+                           z=z,
+                           dt_bias=dt_bias,
+                           dt_softplus=True,
+                           state_batch_indices=state_indices,
+                           pad_slot_id=PAD_SLOT_ID,
+                           out=out)
     out_ref = selective_state_update_ref(state_ref,
                                          x,
                                          dt,
diff --git a/tests/kernels/mamba/test_mamba_ssm_ssd.py b/tests/kernels/mamba/test_mamba_ssm_ssd.py
index 00c1a2911d7db..67b14a7faa89f 100644
--- a/tests/kernels/mamba/test_mamba_ssm_ssd.py
+++ b/tests/kernels/mamba/test_mamba_ssm_ssd.py
@@ -212,15 +212,16 @@ def test_mamba_chunk_scan_single_example(d_head, n_heads, seq_len_chunk_size,
 
     Y_min, final_state_min = ssd_minimal_discrete(X * dt.unsqueeze(-1), A * dt,
                                                   B, C, chunk_size)
-
-    Y, final_state = mamba_chunk_scan_combined(X,
-                                               dt,
-                                               A,
-                                               B,
-                                               C,
-                                               chunk_size,
-                                               D=None,
-                                               return_final_states=True)
+    Y = torch.empty_like(X)
+    final_state = mamba_chunk_scan_combined(X,
+                                            dt,
+                                            A,
+                                            B,
+                                            C,
+                                            chunk_size,
+                                            D=None,
+                                            return_final_states=True,
+                                            out=Y)
 
     # just test the last in sequence
     torch.testing.assert_close(Y[:, -1], Y_min[:, -1], atol=atol, rtol=rtol)
@@ -292,7 +293,8 @@ def test_mamba_chunk_scan_cont_batch(d_head, n_heads, seq_len_chunk_size_cases,
             _query_start_loc_to_chunk_indices_offsets(
                 cu_seqlens, chunk_size, cu_seqlens[-1])
 
-        Y, new_states = mamba_chunk_scan_combined(
+        Y = torch.empty_like(X)
+        new_states = mamba_chunk_scan_combined(
             X,
             dt,
             A,
@@ -306,6 +308,7 @@ def test_mamba_chunk_scan_cont_batch(d_head, n_heads, seq_len_chunk_size_cases,
             chunk_offsets=chunk_offsets,
             return_varlen_states=True,
             initial_states=states,
+            out=Y,
         )
 
         # just test the last in sequence
diff --git a/vllm/model_executor/layers/mamba/mamba_mixer.py b/vllm/model_executor/layers/mamba/mamba_mixer.py
index 796c8d9375727..60cf3e11885a1 100644
--- a/vllm/model_executor/layers/mamba/mamba_mixer.py
+++ b/vllm/model_executor/layers/mamba/mamba_mixer.py
@@ -220,7 +220,8 @@ class MambaMixer(CustomOp):
                 has_initial_state=attn_metadata.context_lens_tensor > 0,
                 query_start_loc=attn_metadata.query_start_loc)
         else:
-            scan_outputs = selective_state_update(
+            scan_outputs = torch.empty_like(hidden_states.transpose(0, 1))
+            selective_state_update(
                 mamba_cache_params.ssm_state,
                 hidden_states.transpose(0, 1),
                 discrete_time_step.transpose(0, 1),
@@ -231,7 +232,8 @@ class MambaMixer(CustomOp):
                 gate.transpose(0, 1),
                 time_proj_bias,
                 dt_softplus=True,
-                state_batch_indices=mamba_cache_params.state_indices_tensor)
+                state_batch_indices=mamba_cache_params.state_indices_tensor,
+                out=scan_outputs)
             scan_outputs = scan_outputs.transpose(0, 1)
 
         # 4. Final linear projection
diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py
index 36edac2375d0e..5ac9a7f9ab3e4 100644
--- a/vllm/model_executor/layers/mamba/mamba_mixer2.py
+++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py
@@ -541,7 +541,6 @@ class MambaMixer2(MambaBase, CustomOp):
         # NOTE: V0 put prefill before decode, v1 puts decode before prefill
         # Separate prefill and decode by splitting varlen input
         # Split along token dimension
-        # NOTE: V0 put prefill before decode, v1 puts decode before prefill
         if envs.VLLM_USE_V1:
             hidden_states_B_C_d, hidden_states_B_C_p = torch.split(
                 hidden_states_B_C[:num_actual_tokens],
@@ -583,7 +582,28 @@ class MambaMixer2(MambaBase, CustomOp):
                                                                1]
                                  if has_prefill else None)
 
-        ssd_output_list = []
+        # Preallocate output tensor to avoid memcpy cost for merging prefill
+        # and decode outputs
+        preallocated_ssm_out = torch.empty(
+            [
+                num_prefill_tokens + num_decodes,
+                (self.num_heads // self.tp_size) * self.head_dim
+            ],
+            dtype=hidden_states.dtype,
+            device=hidden_states.device,
+        )
+        if envs.VLLM_USE_V1:
+            preallocated_ssm_out_d, preallocated_ssm_out_p = torch.split(
+                preallocated_ssm_out,
+                [num_decodes, num_prefill_tokens],
+                dim=0,
+            )
+        else:
+            preallocated_ssm_out_p, preallocated_ssm_out_d = torch.split(
+                preallocated_ssm_out,
+                [num_prefill_tokens, num_decodes],
+                dim=0,
+            )
 
         # Process prefill requests
         if has_prefill:
@@ -623,7 +643,8 @@ class MambaMixer2(MambaBase, CustomOp):
                         has_initial_states_p[:num_prefills, None, None, None],
                         ssm_state[state_indices_tensor_p], 0)
 
-            scan_output, varlen_state = mamba_chunk_scan_combined(
+            # NOTE: final output is an in-place update of out tensor
+            varlen_state = mamba_chunk_scan_combined(
                 hidden_states_p.view(1, num_prefill_tokens,
                                      self.num_heads // self.tp_size,
                                      self.head_dim),
@@ -646,15 +667,14 @@ class MambaMixer2(MambaBase, CustomOp):
                 return_final_states=False,
                 dt_softplus=True,
                 dt_limit=(0.0, float("inf")),
+                out=preallocated_ssm_out_p.view(1, num_prefill_tokens, -1,
+                                                self.head_dim),
             )
 
             # update ssm states
             # - varlen state is a (num_prefills, nheads, headdim, dstate) tensor
             ssm_state[state_indices_tensor_p] = varlen_state
 
-            # - reshape
-            ssd_output_list.append(scan_output.view(num_prefill_tokens, -1))
-
         # Process decode requests
         if has_decode:
             # 2. Convolution sequence transformation
@@ -684,8 +704,8 @@ class MambaMixer2(MambaBase, CustomOp):
             # - the hidden is reshaped into (bs, num_heads, head_dim)
             # - mamba_cache_params.ssm_state's slots will be selected
             #   using state_indices_tensor_d
-
-            hidden_states_d = selective_state_update(
+            # NOTE: final output is an in-place update of out tensor
+            selective_state_update(
                 ssm_state,
                 hidden_states_d,
                 dt_d,
@@ -697,26 +717,16 @@ class MambaMixer2(MambaBase, CustomOp):
                 dt_bias=dt_bias,
                 dt_softplus=True,
                 state_batch_indices=state_indices_tensor_d,
+                out=preallocated_ssm_out_d.view(num_decodes, -1,
+                                                self.head_dim),
             )
 
-            if envs.VLLM_USE_V1:
-                ssd_output_list.insert(
-                    0,
-                    hidden_states_d.view(-1, (self.num_heads // self.tp_size) *
-                                         self.head_dim))
-            else:
-                ssd_output_list.append(
-                    hidden_states_d.view(-1, (self.num_heads // self.tp_size) *
-                                         self.head_dim))
-
-        # Merge prefill and decode outputs before passing to gated MLP
-        hidden_states = torch.vstack(ssd_output_list)
-
         # 4. gated MLP
         # GatedRMSNorm internally applying SiLU to the gate
         # SiLU is applied internally before normalization, unlike standard
         # norm usage
-        hidden_states = self.norm(hidden_states, gate[:num_actual_tokens])
+        hidden_states = self.norm(preallocated_ssm_out,
+                                  gate[:num_actual_tokens])
 
         # 5. Final linear projection
         output[:num_actual_tokens], _ = self.out_proj(hidden_states)
diff --git a/vllm/model_executor/layers/mamba/ops/mamba_ssm.py b/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
index 3f67fc35afdfc..838290a9f5fb2 100644
--- a/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
+++ b/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
@@ -205,7 +205,8 @@ def selective_state_update(state,
                            dt_bias=None,
                            dt_softplus=False,
                            state_batch_indices=None,
-                           pad_slot_id=PAD_SLOT_ID):
+                           pad_slot_id=PAD_SLOT_ID,
+                           out=None):
     """
     Argument:
         state: (batch, dim, dstate) or (batch, nheads, dim, dstate)
@@ -223,10 +224,9 @@ def selective_state_update(state,
             for example: cache_indices = [pad_slot_id, 1, 20, pad_slot_id] 
             in this case, the kernel will not process entries at 
             indices 0 and 3
-    Return:
-        out: (batch, dim) or (batch, nheads, dim)
+        out: Preallocated ssm output tensor. Assume same shape as x. 
+             In-place updated.
     """
-    has_heads = state.dim() > 3
     if state.dim() == 3:
         state = state.unsqueeze(1)
     if x.dim() == 2:
@@ -245,6 +245,8 @@ def selective_state_update(state,
         z = z.unsqueeze(1)
     if dt_bias is not None and dt_bias.dim() == 1:
         dt_bias = dt_bias.unsqueeze(0)
+    if out.dim() == 2:
+        out = out.unsqueeze(1)
 
     _, nheads, dim, dstate = state.shape
     batch = x.shape[0]
@@ -264,7 +266,8 @@ def selective_state_update(state,
         assert dt_bias.shape == (nheads, dim)
     if state_batch_indices is not None:
         assert state_batch_indices.shape == (batch, )
-    out = torch.empty_like(x)
+    assert out.shape == x.shape
+
     grid = lambda META: (triton.cdiv(dim, META['BLOCK_SIZE_M']), batch, nheads)
     z_strides = ((z.stride(0), z.stride(1), z.stride(2)) if z is not None else
                  (0, 0, 0))
@@ -328,9 +331,6 @@ def selective_state_update(state,
             BLOCK_SIZE_M,
             num_warps=num_warps,
         )
-    if not has_heads:
-        out = out.squeeze(1)
-    return out
 
 
 def selective_scan_fn(u,
diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
index 61eff0c008f60..fc2b3b25fd0a8 100644
--- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
+++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
@@ -454,6 +454,7 @@ def _chunk_scan_fwd(
     chunk_indices=None,
     chunk_offsets=None,
     initial_states=None,
+    out=None,
 ):
     batch, seqlen, nheads, headdim = x.shape
     _, _, nchunks, chunk_size = dt.shape
@@ -483,20 +484,10 @@ def _chunk_scan_fwd(
     else:
         chunk_indices, chunk_offsets = None, None
 
-    # Allocates output.
-    out = torch.empty(batch,
-                      seqlen,
-                      nheads,
-                      headdim,
-                      device=x.device,
-                      dtype=x.dtype)
+    assert out.shape == x.shape
+
     if z is not None:
-        out_x = torch.empty(batch,
-                            seqlen,
-                            nheads,
-                            headdim,
-                            device=x.device,
-                            dtype=x.dtype)
+        out_x = torch.empty_like(x)
         assert out_x.stride() == out.stride()
     else:
         out_x = None
@@ -579,4 +570,4 @@ def _chunk_scan_fwd(
         IS_TRITON_22=TRITON_22,
         HAS_INITSTATES=initial_states is not None,
     )
-    return out, out_x
+    return out_x
diff --git a/vllm/model_executor/layers/mamba/ops/ssd_combined.py b/vllm/model_executor/layers/mamba/ops/ssd_combined.py
index b121275e9eb38..ad2853a3d8a8b 100644
--- a/vllm/model_executor/layers/mamba/ops/ssd_combined.py
+++ b/vllm/model_executor/layers/mamba/ops/ssd_combined.py
@@ -36,7 +36,8 @@ def _mamba_chunk_scan_combined_fwd(x,
                                    chunk_offsets=None,
                                    cu_seqlens=None,
                                    dt_softplus=False,
-                                   dt_limit=(0.0, float("inf"))):
+                                   dt_limit=(0.0, float("inf")),
+                                   out=None):
     batch, seqlen, nheads, headdim = x.shape
     _, _, ngroups, dstate = B.shape
     assert nheads % ngroups == 0
@@ -134,7 +135,7 @@ def _mamba_chunk_scan_combined_fwd(x,
     # - in each (pseudo) chunk, we detect if the previous (pseudo) chunk had
     #   a seq_idx change, in which case we take states information from
     #   init_states.
-    out, out_x = _chunk_scan_fwd(
+    out_x = _chunk_scan_fwd(
         CB,
         x,
         dt,
@@ -147,9 +148,10 @@ def _mamba_chunk_scan_combined_fwd(x,
         chunk_indices=chunk_indices,
         chunk_offsets=chunk_offsets,
         initial_states=initial_states,
+        out=out,
     )
     if cu_seqlens is None:
-        return out, out_x, dt, dA_cumsum, states, final_states
+        return out_x, dt, dA_cumsum, states, final_states
     else:
         assert batch == 1, "passing cu_seqlens to get the varlen states is only supported if batch dimension is 1"
         varlen_states = chunk_state_varlen(
@@ -161,7 +163,7 @@ def _mamba_chunk_scan_combined_fwd(x,
             states.squeeze(0),
             initial_states=initial_states,
         )
-        return out, out_x, dt, dA_cumsum, states, final_states, varlen_states
+        return out_x, dt, dA_cumsum, states, final_states, varlen_states
 
 
 def mamba_chunk_scan_combined(x,
@@ -180,6 +182,7 @@ def mamba_chunk_scan_combined(x,
                               cu_seqlens=None,
                               dt_softplus=False,
                               dt_limit=(0.0, float("inf")),
+                              out=None,
                               return_final_states=False,
                               return_varlen_states=False):
     """
@@ -197,15 +200,14 @@ def mamba_chunk_scan_combined(x,
         seq_idx: (batch, seqlen)
         cu_seqlens: (num_sequences + 1) or None, only used if return_varlen_states is True
         dt_softplus: Whether to apply softplus to dt
-    Return:
-        out: (batch, seqlen, nheads, headdim)
+        out: Preallocated output tensor
     """
 
     if not return_varlen_states:
         cu_seqlens = None
     else:
         assert cu_seqlens is not None, "cu_seqlens must be provided if return_varlen_states is True"
-    out, out_x, dt_out, dA_cumsum, states, final_states, *rest = _mamba_chunk_scan_combined_fwd(
+    out_x, dt_out, dA_cumsum, states, final_states, *rest = _mamba_chunk_scan_combined_fwd(
         x,
         dt,
         A,
@@ -221,12 +223,14 @@ def mamba_chunk_scan_combined(x,
         chunk_offsets=chunk_offsets,
         cu_seqlens=cu_seqlens,
         dt_softplus=dt_softplus,
-        dt_limit=dt_limit)
+        dt_limit=dt_limit,
+        out=out)
     if not return_varlen_states:
-        return out if not return_final_states else (out, final_states)
+        if not return_final_states:
+            return
+        else:
+            return final_states
     else:
         varlen_states = rest[0]
-        return (out,
-                varlen_states) if not return_final_states else (out,
-                                                                final_states,
+        return (varlen_states) if not return_final_states else (final_states,
                                                                 varlen_states)
diff --git a/vllm/model_executor/models/phi4flash.py b/vllm/model_executor/models/phi4flash.py
index a4ded2b7a3047..1a761d01fc066 100644
--- a/vllm/model_executor/models/phi4flash.py
+++ b/vllm/model_executor/models/phi4flash.py
@@ -387,7 +387,8 @@ class Phi4Mamba(nn.Module):
                 has_initial_state=attn_metadata.context_lens_tensor > 0,
                 query_start_loc=attn_metadata.query_start_loc)
         else:
-            scan_outputs = selective_state_update(
+            scan_outputs = torch.empty_like(hidden_states.transpose(0, 1))
+            selective_state_update(
                 mamba_cache_params.ssm_state,
                 hidden_states.transpose(0, 1),
                 discrete_time_step.transpose(0, 1),
@@ -400,7 +401,8 @@ class Phi4Mamba(nn.Module):
                 None if self.yoco_kv else gate.transpose(0, 1),
                 time_proj_bias,
                 dt_softplus=True,
-                state_batch_indices=mamba_cache_params.state_indices_tensor)
+                state_batch_indices=mamba_cache_params.state_indices_tensor,
+                out=scan_outputs)
             scan_outputs = scan_outputs.transpose(0, 1)
 
         # 4. Final linear projection
diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py
index 9bc577cfe3a3e..8b1df66f02805 100644
--- a/vllm/model_executor/models/plamo2.py
+++ b/vllm/model_executor/models/plamo2.py
@@ -257,7 +257,21 @@ class Plamo2MambaMixer(nn.Module):
         query_start_loc_p = (attn_metadata.query_start_loc[:num_prefills + 1]
                              if has_prefill else None)
 
-        ssd_output_list = []
+        # Preallocate output tensor to avoid memcpy cost for merging prefill
+        # and decode outputs
+        preallocated_ssm_out = torch.empty(
+            [
+                num_prefill_tokens + num_decodes,
+                (self.num_heads // self.tp_size) * self.head_dim
+            ],
+            dtype=hidden_states.dtype,
+            device=hidden_states.device,
+        )
+        preallocated_ssm_out_p, preallocated_ssm_out_d = torch.split(
+            preallocated_ssm_out,
+            [num_prefill_tokens, num_decodes],
+            dim=0,
+        )
 
         # Process prefill requests
         if has_prefill:
@@ -290,7 +304,7 @@ class Plamo2MambaMixer(nn.Module):
                 initial_states = torch.where(
                     mamba2_metadata.has_initial_states[:, None, None, None],
                     mamba_cache_params.ssm_state[state_indices_tensor_p], 0)
-            scan_output, varlen_state = mamba_chunk_scan_combined(
+            varlen_state = mamba_chunk_scan_combined(
                 hidden_states_p.view(1, num_prefill_tokens,
                                      self.num_heads // self.tp_size,
                                      self.head_dim),
@@ -312,15 +326,14 @@ class Plamo2MambaMixer(nn.Module):
                 return_final_states=False,
                 dt_softplus=True,
                 dt_limit=(0.0, float("inf")),
+                out=preallocated_ssm_out_p.view(1, num_prefill_tokens, -1,
+                                                self.head_dim),
             )
 
             # update ssm states
             # - varlen state is a (batch, nheads, headdim, dstate) tensor
             mamba_cache_params.ssm_state[state_indices_tensor_p] = varlen_state
 
-            # - reshape
-            ssd_output_list.append(scan_output.view(num_prefill_tokens, -1))
-
         # Process decode requests
         if has_decode:
             # 2. Convolution sequence transformation
@@ -349,8 +362,7 @@ class Plamo2MambaMixer(nn.Module):
             # - the hidden is reshaped into (bs, num_heads, head_dim)
             # - mamba_cache_params.ssm_state's slots will be selected
             #   using state_indices_tensor_d
-
-            hidden_states_d = selective_state_update(
+            selective_state_update(
                 mamba_cache_params.ssm_state,
                 hidden_states_d,
                 dt,
@@ -362,17 +374,13 @@ class Plamo2MambaMixer(nn.Module):
                 dt_bias=dt_bias,
                 dt_softplus=True,
                 state_batch_indices=state_indices_tensor_d,
+                out=preallocated_ssm_out_d.view(num_decodes, -1,
+                                                self.head_dim),
             )
             assert self.num_heads % self.tp_size == 0
-            ssd_output_list.append(
-                hidden_states_d.view(-1, (self.num_heads // self.tp_size) *
-                                     self.head_dim))
-
-        # Merge prefill and decode outputs before passing to MLP
-        hidden_states = torch.vstack(ssd_output_list)
 
         # 4. Final linear projection
-        out = self.out_proj(hidden_states)
+        out = self.out_proj(preallocated_ssm_out)
         return out
 
 

From f5d0f4784fdd93f1032f3bb81220af10d7588f5a Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Sat, 2 Aug 2025 17:20:38 +0800
Subject: [PATCH 163/224] [Frontend] Improve error message for too many mm
 items (#22114)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/entrypoints/test_chat_utils.py | 10 ++----
 tests/multimodal/test_processing.py  | 10 +++---
 vllm/entrypoints/chat_utils.py       | 27 ++++++--------
 vllm/multimodal/processing.py        | 54 ++++++++++++++++++----------
 vllm/multimodal/profiling.py         |  2 +-
 5 files changed, 52 insertions(+), 51 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 54daf1a91d645..647f1c7b7f34f 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -579,10 +579,7 @@ def test_parse_chat_messages_rejects_too_many_images_in_one_message(
         warnings.filterwarnings(
             "ignore",
             message="coroutine 'async_get_and_parse_image' was never awaited")
-        with pytest.raises(
-                ValueError,
-                match="At most 2 image\\(s\\) may be provided in one request\\."
-        ):
+        with pytest.raises(ValueError, match="At most"):
             parse_chat_messages(
                 [{
                     "role":
@@ -622,10 +619,7 @@ def test_parse_chat_messages_rejects_too_many_images_across_messages(
         warnings.filterwarnings(
             "ignore",
             message="coroutine 'async_get_and_parse_image' was never awaited")
-        with pytest.raises(
-                ValueError,
-                match="At most 2 image\\(s\\) may be provided in one request\\."
-        ):
+        with pytest.raises(ValueError, match="At most"):
             parse_chat_messages(
                 [{
                     "role":
diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py
index 659ee9af9ddec..508c773b8aedf 100644
--- a/tests/multimodal/test_processing.py
+++ b/tests/multimodal/test_processing.py
@@ -3,7 +3,6 @@
 
 from contextlib import nullcontext
 from typing import Optional, cast
-from unittest.mock import MagicMock
 
 import numpy as np
 import pytest
@@ -957,15 +956,14 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
     )
 
     processor = MULTIMODAL_REGISTRY.create_processor(model_config)
-    profiler = MultiModalProfiler(processor)
+    processor._supported_mm_limits = {"image": num_supported}
 
-    mock_supported_mm_limits = MagicMock(return_value={"image": num_supported})
-    processor.info.get_supported_mm_limits = mock_supported_mm_limits
+    profiler = MultiModalProfiler(processor)
 
     if is_valid:
         exc_ctx = nullcontext()
     else:
-        exc_ctx = pytest.raises(ValueError, match="The model only supports")
+        exc_ctx = pytest.raises(ValueError, match="At most")
 
     with exc_ctx:
         profiler.get_decoder_dummy_data(
@@ -1002,7 +1000,7 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
     if is_valid:
         exc_ctx = nullcontext()
     else:
-        exc_ctx = pytest.raises(ValueError, match=f"passed {num_images} image")
+        exc_ctx = pytest.raises(ValueError, match="At most")
 
     with exc_ctx:
         processor.apply(
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 6485ed6b148b4..a658d97cc8c5e 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -535,9 +535,10 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
         return self._model_config
 
     @cached_property
-    def model_cls(self):
+    def model_cls(self) -> type[SupportsMultiModal]:
         from vllm.model_executor.model_loader import get_model_cls
-        return get_model_cls(self.model_config)
+        model_cls = get_model_cls(self.model_config)
+        return cast(type[SupportsMultiModal], model_cls)
 
     @property
     def allowed_local_media_path(self):
@@ -547,31 +548,23 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
     def mm_registry(self):
         return MULTIMODAL_REGISTRY
 
+    @cached_property
+    def mm_processor(self):
+        return self.mm_registry.create_processor(self.model_config)
+
     def add(self, modality: ModalityStr, item: _T) -> Optional[str]:
         """
         Add a multi-modal item to the current prompt and returns the
         placeholder string to use, if any.
         """
-        mm_registry = self.mm_registry
-        model_config = self.model_config
-        model_cls = cast(SupportsMultiModal, self.model_cls)
-
         input_modality = modality.replace("_embeds", "")
+        num_items = len(self._items_by_modality[modality]) + 1
 
-        mm_processor = mm_registry.create_processor(model_config)
-        allowed_counts = mm_processor.info.get_allowed_mm_limits()
-        allowed_count = allowed_counts.get(input_modality, 0)
-
-        current_count = len(self._items_by_modality[modality]) + 1
-        if current_count > allowed_count:
-            raise ValueError(
-                f"At most {allowed_count} {modality}(s) may be provided in "
-                "one request. You can set `--limit-mm-per-prompt` to "
-                "increase this limit if the model supports it.")
+        self.mm_processor.validate_num_items(input_modality, num_items)
 
         self._items_by_modality[modality].append(item)
 
-        return model_cls.get_placeholder_str(modality, current_count)
+        return self.model_cls.get_placeholder_str(modality, num_items)
 
     @abstractmethod
     def create_parser(self) -> "BaseMultiModalContentParser":
diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
index 78d244a6b4fc8..46240855d12a2 100644
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import json
 import sys
 from abc import ABC, abstractmethod
 from collections import defaultdict
@@ -1156,6 +1155,18 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
 
         self.data_parser = self._get_data_parser()
 
+        # Avoid unnecessary recomputation
+        self._supported_mm_limits = self.info.get_supported_mm_limits()
+        self._allowed_mm_limits = self.info.get_allowed_mm_limits()
+
+    @property
+    def supported_mm_limits(self):
+        return self._supported_mm_limits
+
+    @property
+    def allowed_mm_limits(self):
+        return self._allowed_mm_limits
+
     def __call__(
         self,
         prompt: str,
@@ -1176,6 +1187,28 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
         """
         return MultiModalDataParser()
 
+    def validate_num_items(
+        self,
+        modality: str,
+        num_items: int,
+    ) -> None:
+        supported_limit = self.supported_mm_limits.get(modality, 0)
+        allowed_limit = self.allowed_mm_limits.get(modality, 0)
+
+        if supported_limit is None:
+            supported_limit = allowed_limit
+
+        limit = min(supported_limit, allowed_limit)
+
+        if num_items > limit:
+            msg = (f"At most {limit} {modality}(s) may be provided in "
+                   "one prompt.")
+
+            if num_items <= supported_limit:
+                msg += " Set `--limit-mm-per-prompt` to increase this limit."
+
+            raise ValueError(msg)
+
     def _to_mm_items(
         self,
         mm_data: MultiModalDataDict,
@@ -1188,26 +1221,9 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
         [`_get_hf_mm_data`][vllm.multimodal.processing.BaseMultiModalProcessor._get_hf_mm_data].
         """
         mm_items = self.data_parser.parse_mm_data(mm_data)
-        supported_mm_limits = self.info.get_supported_mm_limits()
-        allowed_mm_limits = self.info.get_allowed_mm_limits()
 
         for modality, items in mm_items.items():
-            supported_limit = supported_mm_limits.get(modality, 0)
-            allowed_limit = allowed_mm_limits.get(modality, 0)
-            num_items = len(items)
-
-            if supported_limit is not None and num_items > supported_limit:
-                raise ValueError(
-                    f"The model only supports at most {supported_limit} "
-                    f"{modality} items, but you passed {num_items} "
-                    f"{modality} items in the same prompt.")
-
-            if num_items > allowed_limit:
-                raise ValueError(
-                    "You set or defaulted to "
-                    f"'{json.dumps({modality: allowed_limit})}' in "
-                    f"`--limit-mm-per-prompt`, but passed {num_items} "
-                    f"{modality} items in the same prompt.")
+            self.validate_num_items(modality, len(items))
 
         return mm_items
 
diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py
index d96803b643ff2..d876887fc155d 100644
--- a/vllm/multimodal/profiling.py
+++ b/vllm/multimodal/profiling.py
@@ -156,7 +156,7 @@ class MultiModalProfiler(Generic[_I]):
         return self.processor.dummy_inputs
 
     def get_mm_limits(self) -> Mapping[str, int]:
-        return self.processing_info.get_allowed_mm_limits()
+        return self.processor.allowed_mm_limits
 
     def _get_dummy_mm_inputs(
         self,

From 4abfd8796f37adc8fccc9481f37f20de1bce62e4 Mon Sep 17 00:00:00 2001
From: Thomas Parnell <tpa@zurich.ibm.com>
Date: Sat, 2 Aug 2025 14:29:40 +0200
Subject: [PATCH 164/224] [V1] [Hybrid] Validate compatibility of attention
 backend batch reordering at init time (#21557)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
---
 vllm/v1/attention/backends/flashinfer.py    | 28 +++++-------
 vllm/v1/attention/backends/mamba_attn.py    | 20 +++------
 vllm/v1/attention/backends/mla/common.py    | 22 +++------
 vllm/v1/attention/backends/rocm_aiter_fa.py |  3 --
 vllm/v1/attention/backends/utils.py         | 12 ++---
 vllm/v1/worker/cpu_model_runner.py          | 34 +++++++++++++-
 vllm/v1/worker/gpu_model_runner.py          | 49 ++++++++++++++-------
 7 files changed, 96 insertions(+), 72 deletions(-)

diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 0aaad02b5b840..3697cb9387a92 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -4,7 +4,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, ClassVar, Optional, Union
+from typing import ClassVar, Optional, Union
 
 import torch
 from flashinfer import (BatchDecodeWithPagedKVCacheWrapper,
@@ -21,17 +21,17 @@ from vllm.logger import init_logger
 from vllm.utils import cdiv, is_pin_memory_available
 from vllm.utils.flashinfer import use_trtllm_decode_attention
 from vllm.v1.attention.backends.flash_attn import use_cascade_attention
-from vllm.v1.attention.backends.utils import (
-    AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata,
-    get_kv_cache_layout, get_per_layer_parameters,
-    infer_global_hyperparameters, reorder_batch_to_split_decodes_and_prefills,
-    split_decodes_and_prefills)
+# yapf conflicts with isort for this block
+# yapf: disable
+from vllm.v1.attention.backends.utils import (AttentionCGSupport,
+                                              AttentionMetadataBuilder,
+                                              CommonAttentionMetadata,
+                                              get_kv_cache_layout,
+                                              get_per_layer_parameters,
+                                              infer_global_hyperparameters,
+                                              split_decodes_and_prefills)
 from vllm.v1.kv_cache_interface import AttentionSpec
 
-if TYPE_CHECKING:
-    from vllm.v1.core.sched.output import SchedulerOutput
-    from vllm.v1.worker.gpu_input_batch import InputBatch
-
 FLASHINFER_WORKSPACE_BUFFER_SIZE = 256 * 1024 * 1024
 
 logger = init_logger(__name__)
@@ -179,6 +179,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
     attn_cudagraph_support: ClassVar[AttentionCGSupport] = \
         AttentionCGSupport.PURE_DECODE_ONLY
 
+    reorder_batch_threshold: ClassVar[int] = 1
+
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                  vllm_config: VllmConfig, device: torch.device):
         self.device = device
@@ -239,12 +241,6 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
                                                dtype=torch.int32,
                                                device=self.device)
 
-    def reorder_batch(self, input_batch: InputBatch,
-                      scheduler_output: SchedulerOutput) -> bool:
-        return reorder_batch_to_split_decodes_and_prefills(input_batch,
-                                                           scheduler_output,
-                                                           decode_threshold=1)
-
     def _get_workspace_buffer(self):
         if self._workspace_buffer is None:
             self._workspace_buffer = torch.empty(
diff --git a/vllm/v1/attention/backends/mamba_attn.py b/vllm/v1/attention/backends/mamba_attn.py
index 8b702e28d67c0..66a8d91db89c2 100644
--- a/vllm/v1/attention/backends/mamba_attn.py
+++ b/vllm/v1/attention/backends/mamba_attn.py
@@ -2,21 +2,17 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import math
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Optional
+from typing import ClassVar, Optional
 
 import torch
 
 from vllm.attention.backends.abstract import AttentionBackend
 from vllm.config import VllmConfig
-from vllm.v1.attention.backends.utils import (
-    AttentionMetadataBuilder, CommonAttentionMetadata,
-    reorder_batch_to_split_decodes_and_prefills, split_decodes_and_prefills)
+from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
+                                              CommonAttentionMetadata,
+                                              split_decodes_and_prefills)
 from vllm.v1.kv_cache_interface import AttentionSpec, MambaSpec
 
-if TYPE_CHECKING:
-    from vllm.v1.core.sched.output import SchedulerOutput
-    from vllm.v1.worker.gpu_input_batch import InputBatch
-
 
 def _query_start_loc_to_chunk_indices_offsets(query_start_loc: torch.Tensor,
                                               chunk_size: int,
@@ -87,6 +83,8 @@ class Mamba2AttentionMetadata:
 class Mamba2AttentionMetadataBuilder(
         AttentionMetadataBuilder[Mamba2AttentionMetadata]):
 
+    reorder_batch_threshold: ClassVar[int] = 1
+
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                  vllm_config: VllmConfig, device: torch.device):
         assert isinstance(kv_cache_spec, MambaSpec)
@@ -95,12 +93,6 @@ class Mamba2AttentionMetadataBuilder(
         assert self.chunk_size is not None, (
             "chunk_size needs to be set in the model config for Mamba2 models")
 
-    def reorder_batch(self, input_batch: "InputBatch",
-                      scheduler_output: "SchedulerOutput") -> bool:
-        return reorder_batch_to_split_decodes_and_prefills(input_batch,
-                                                           scheduler_output,
-                                                           decode_threshold=1)
-
     def build(self,
               common_prefix_len: int,
               common_attn_metadata: CommonAttentionMetadata,
diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py
index d112468f1c91d..badff67656c24 100755
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@@ -190,7 +190,7 @@ return curr_o @ W_O
 import functools
 from abc import abstractmethod
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Generic, Optional, TypeVar, Union
+from typing import ClassVar, Generic, Optional, TypeVar, Union
 
 import torch
 
@@ -210,10 +210,11 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
 from vllm.platforms import current_platform
 from vllm.utils import cdiv, round_down
 from vllm.utils.flashinfer import has_nvidia_artifactory
-from vllm.v1.attention.backends.utils import (
-    AttentionMetadataBuilder, CommonAttentionMetadata,
-    get_per_layer_parameters, infer_global_hyperparameters,
-    reorder_batch_to_split_decodes_and_prefills, split_decodes_and_prefills)
+from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
+                                              CommonAttentionMetadata,
+                                              get_per_layer_parameters,
+                                              infer_global_hyperparameters,
+                                              split_decodes_and_prefills)
 from vllm.v1.kv_cache_interface import AttentionSpec
 
 try:
@@ -233,10 +234,6 @@ try:
 except ImportError:
     flashinfer_available = False
 
-if TYPE_CHECKING:
-    from vllm.v1.core.sched.output import SchedulerOutput
-    from vllm.v1.worker.gpu_input_batch import InputBatch
-
 logger = init_logger(__name__)
 
 CUDNN_WORKSPACE_SIZE = 12800
@@ -403,6 +400,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
     NOTE: Please read the comment at the top of the file before trying to
     understand this class
     """
+    reorder_batch_threshold: ClassVar[int] = 1
 
     def __init__(self,
                  kv_cache_spec: AttentionSpec,
@@ -559,12 +557,6 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
         prefill.prefill_main = self._fi_prefill_main
         prefill.prefill_chunks = self._fi_prefill_chunks
 
-    def reorder_batch(self, input_batch: "InputBatch",
-                      scheduler_output: "SchedulerOutput") -> bool:
-        return reorder_batch_to_split_decodes_and_prefills(input_batch,
-                                                           scheduler_output,
-                                                           decode_threshold=1)
-
     def _build_decode(self, block_table_tensor: torch.Tensor,
                       seq_lens: torch.Tensor):
         return MLACommonDecodeMetadata(
diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
index dd10b7f02730a..abe05174507ff 100644
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -251,9 +251,6 @@ class AiterFlashAttentionMetadataBuilder(
         self.aot_sliding_window: Optional[tuple[int, int]] = None
         self.total_tokens: int = 0
 
-    def reorder_batch(self, input_batch, scheduler_output) -> bool:
-        return False
-
     def build_for_cudagraph_capture(
             self, common_attn_metadata: CommonAttentionMetadata):
         self.total_tokens = self.model_config.max_model_len \
diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index 0f041573e9d20..6defd211f4cfa 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -167,6 +167,10 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]):
     # Does this backend/builder support CUDA Graphs for attention.
     attn_cudagraph_support: ClassVar[AttentionCGSupport] = \
         AttentionCGSupport.NEVER
+    # Does this backend/builder reorder the batch?
+    # If not, set this to None. Otherwise set it to the query
+    # length that will be pulled into the front of the batch.
+    reorder_batch_threshold: ClassVar[Optional[int]] = None
 
     @abstractmethod
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
@@ -221,14 +225,6 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]):
     ) -> bool:
         return False
 
-    def reorder_batch(self, input_batch: "InputBatch",
-                      scheduler_output: "SchedulerOutput") -> bool:
-        """
-        This method can reorder the batch if desired by the backend.
-        :return: Has the batch been reordered (default False).
-        """
-        return False
-
 
 @functools.lru_cache
 def get_kv_cache_layout():
diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py
index 6b2b50a57e1f8..d8f3e0d89a960 100644
--- a/vllm/v1/worker/cpu_model_runner.py
+++ b/vllm/v1/worker/cpu_model_runner.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from contextlib import contextmanager
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 import torch
 import torch.nn as nn
@@ -9,8 +9,12 @@ import torch.nn as nn
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.model_executor.model_loader import get_model
+from vllm.v1.attention.backends.cpu_attn import TorchSDPAMetadataBuilderV1
 from vllm.v1.worker.gpu_model_runner import GPUModelRunner
 
+if TYPE_CHECKING:
+    from vllm.v1.core.sched.output import SchedulerOutput
+
 logger = init_logger(__name__)
 
 
@@ -27,6 +31,34 @@ class CPUModelRunner(GPUModelRunner):
 
         self._postprocess_tenosrs()
 
+    def _may_reorder_batch(self, scheduler_output: "SchedulerOutput") -> None:
+        """
+        Update the order of requests in the batch based on the attention
+        backend's needs. For example, some attention backends (namely MLA) may
+        want to separate requests based on if the attention computation will be
+        compute-bound or memory-bound.
+
+        Args:
+            scheduler_output: The scheduler output.
+        """
+        # Attention free models have zero kv_cache_goups, however models
+        # like Mamba are also attention free but use the kv_cache for
+        # keeping its internal state. This is why we check the number
+        # of kv_cache groups instead of solely checking
+        # for self.model_config.is_attention_free.
+        if len(self.kv_cache_config.kv_cache_groups) == 0:
+            return
+
+        if len(self.kv_cache_config.kv_cache_groups) > 1:
+            raise ValueError("Multiple KVCacheGroups is not"
+                             "currently supported with CPU model runner.")
+
+        assert type(
+            self.attn_metadata_builders[0]) is TorchSDPAMetadataBuilderV1
+
+        self.attn_metadata_builders[0].reorder_batch(self.input_batch,
+                                                     scheduler_output)
+
     def _postprocess_tenosrs(self) -> None:
         # Note: replace device tensors with cpu tensors
         def replace_tensor(obj: Any, cpu_attr_name: str,
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index d5a5799efb47c..42cef6c5733d2 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -49,7 +49,8 @@ from vllm.v1.attention.backends.mamba_selectors import get_mamba_attn_backend
 from vllm.v1.attention.backends.utils import (
     AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata,
     make_kv_sharing_fast_prefill_attention_metadata,
-    make_local_attention_virtual_batches)
+    make_local_attention_virtual_batches,
+    reorder_batch_to_split_decodes_and_prefills)
 from vllm.v1.core.encoder_cache_manager import compute_encoder_budget
 from vllm.v1.kv_cache_interface import (AttentionSpec,
                                         ChunkedLocalAttentionSpec,
@@ -329,6 +330,8 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             self.kv_sharing_fast_prefill_logits_indices = torch.zeros(
                 self.max_num_tokens, dtype=torch.int32, device=self.device)
 
+        self.reorder_batch_threshold: Optional[int] = None
+
     def _may_reorder_batch(self, scheduler_output: "SchedulerOutput") -> None:
         """
         Update the order of requests in the batch based on the attention
@@ -347,20 +350,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         if len(self.kv_cache_config.kv_cache_groups) == 0:
             return
 
-        self.attn_metadata_builders[0].reorder_batch(self.input_batch,
-                                                     scheduler_output)
-
-        # For models with multiple KV cache groups, the groups should agree on
-        # the same order of requests. We ensure this by only allowing the first
-        # group to reorder the batch and asserting that all other groups do not
-        # reorder the batch.
-        # TODO(tdoublep): make this more flexible so that any group can
-        # re-order the batch (not only the first).
-        # TODO(tdoublep): verify this during engine init instead of at runtime
-        for i in range(1, len(self.kv_cache_config.kv_cache_groups)):
-            batch_reordered = self.attn_metadata_builders[i].reorder_batch(
-                self.input_batch, scheduler_output)
-            assert not batch_reordered
+        if self.reorder_batch_threshold is not None:
+            reorder_batch_to_split_decodes_and_prefills(
+                self.input_batch,
+                scheduler_output,
+                decode_threshold=self.reorder_batch_threshold)
 
     # Note: used for model runner override.
     def _init_device_properties(self) -> None:
@@ -2654,6 +2648,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             self.attn_backends.append(attn_backend_i)
             self.attn_metadata_builders.append(attn_metadata_builder_i)
 
+        # Calculate reorder batch threshold (if neeeded)
+        self.calculate_reorder_batch_threshold()
+
         if len(self.attn_backends) > 0:
             return
 
@@ -2688,6 +2685,28 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             self.attn_metadata_builders.append(attn_metadata_builder)
             self.is_encoder_only_model = True
 
+    def calculate_reorder_batch_threshold(self) -> None:
+        """
+        Check that if any backends reorder batches; that the reordering
+        is compatible (e.g., decode threshold is the same)
+        """
+        for attn_metadata_builder_i in self.attn_metadata_builders:
+            # check that if any backends reorder batches; that the reordering
+            # is compatible (e.g., decode threshold is the same)
+            reorder_batch_threshold_i = (
+                attn_metadata_builder_i.reorder_batch_threshold)
+            if reorder_batch_threshold_i is not None:
+                if self.reorder_batch_threshold is not None:
+                    if reorder_batch_threshold_i != \
+                        self.reorder_batch_threshold:
+                        raise ValueError(
+                            f"Attention backend reorders decodes with "
+                            f"threshold {reorder_batch_threshold_i} but other "
+                            f"backend uses threshold "
+                            f"{self.reorder_batch_threshold}")
+                else:
+                    self.reorder_batch_threshold = reorder_batch_threshold_i
+
     def may_reinitialize_input_batch(self,
                                      kv_cache_config: KVCacheConfig) -> None:
         """

From 73e1b9b1d4cd478eb9d715b637683c000207de67 Mon Sep 17 00:00:00 2001
From: Yan Ma <yan.ma@intel.com>
Date: Sat, 2 Aug 2025 22:49:08 +0800
Subject: [PATCH 165/224] [xpu]support moe models on XPU platform (#21643)

Signed-off-by: yan <yan.ma@intel.com>
Signed-off-by: Yan Ma <yan.ma@intel.com>
---
 vllm/model_executor/layers/fused_moe/layer.py | 47 ++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index e16fc13c945cf..c2039adad99c3 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -327,7 +327,14 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
             layer.w13_weight.data = shuffled_w13
             layer.w2_weight.data = shuffled_w2
 
-        if current_platform.is_cpu():
+        if current_platform.is_xpu():
+            import intel_extension_for_pytorch as ipex
+            layer.ipex_fusion = ipex.llm.modules.GatedMLPMOE(
+                layer.w13_weight,
+                layer.w2_weight,
+                use_prepack=True,
+            )
+        elif current_platform.is_cpu():
             if current_platform.get_cpu_architecture() == CpuArchEnum.X86:
                 from vllm.model_executor.layers.fused_moe import cpu_fused_moe
                 dtype = layer.w13_weight.dtype
@@ -509,6 +516,44 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
             activation,
         )
 
+    def forward_xpu(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        use_grouped_topk: bool,
+        top_k: int,
+        router_logits: torch.Tensor,
+        renormalize: bool,
+        topk_group: Optional[int] = None,
+        num_expert_group: Optional[int] = None,
+        global_num_experts: int = -1,
+        expert_map: Optional[torch.Tensor] = None,
+        custom_routing_function: Optional[Callable] = None,
+        scoring_func: str = "softmax",
+        e_score_correction_bias: Optional[torch.Tensor] = None,
+        apply_router_weight_on_input: bool = False,
+        activation: str = "silu",
+        enable_eplb: bool = False,
+        expert_load_view: Optional[torch.Tensor] = None,
+        logical_to_physical_map: Optional[torch.Tensor] = None,
+        logical_replica_count: Optional[torch.Tensor] = None,
+    ):
+        if enable_eplb is not False or expert_load_view is not None or \
+                logical_to_physical_map is not None or \
+                logical_replica_count is not None:
+            raise NotImplementedError("Expert load balancing is not supported "
+                                      "for XPU.")
+        assert custom_routing_function is None
+        return layer.ipex_fusion(
+            x,
+            use_grouped_topk,
+            top_k,
+            router_logits,
+            renormalize,
+            topk_group,
+            num_expert_group,
+        )
+
     def forward_tpu(
         self,
         layer: torch.nn.Module,

From 554df8a6a2ed9007086f64768803ae4c780127bd Mon Sep 17 00:00:00 2001
From: Xiao <xiszishu@gmail.com>
Date: Sat, 2 Aug 2025 09:03:30 -0700
Subject: [PATCH 166/224] Revert "[compile][startup] Disable C++ compilation of
 symbolic shapes" (#22122)

Signed-off-by: Xiao Liu <xiszishu@gmail.com>
---
 vllm/compilation/decorators.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
index 0d2c432497c40..1370862d580a5 100644
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -267,15 +267,8 @@ def _support_torch_compile(
                     code.co_filename)
                 return inline_call(parent, func, args, kwargs)
 
-            # Disable the C++ compilation of symbolic shape guards. C++-fication
-            # of symbolic shape guards can improve guard overhead. But, since
-            # vllm skip guards anyways, setting this flag to False can improve
-            # compile time.
-            with torch._dynamo.config.patch("enable_cpp_symbolic_shape_guards",
-                                            False), patch.object(
-                                                InliningInstructionTranslator,
-                                                'inline_call',
-                                                patched_inline_call):
+            with patch.object(InliningInstructionTranslator, 'inline_call',
+                              patched_inline_call):
                 output = self.compiled_callable(*args, **kwargs)
             return output
 

From 2ff46b882694cc3eb6cde48f6b9251ccbc5fdb04 Mon Sep 17 00:00:00 2001
From: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
Date: Sat, 2 Aug 2025 19:42:00 -0700
Subject: [PATCH 167/224] [Misc] Bump ray to 2.48.0 (#22123)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
---
 requirements/cuda.txt               |  2 +-
 requirements/nightly_torch_test.txt |  2 +-
 requirements/test.in                |  2 +-
 requirements/test.txt               | 22 +++++++++++++++-------
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/requirements/cuda.txt b/requirements/cuda.txt
index 75008dc20df48..fb30e493f80b3 100644
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@@ -5,7 +5,7 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req
 numba == 0.61.2; python_version > '3.9'
 
 # Dependencies for NVIDIA GPUs
-ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
+ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
 torch==2.7.1
 torchaudio==2.7.1
 # These must be updated alongside torch
diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt
index 0a72ddefda79c..7ae5e6f2f409a 100644
--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
@@ -16,7 +16,7 @@ librosa # required for audio tests
 vocos # required for minicpmo_26 test
 peft
 pqdm
-ray[cgraph,default]>=2.43.0, !=2.44.* # Ray Compiled Graph, required by pipeline parallelism tests
+ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
 sentence-transformers # required for embedding tests
 soundfile # required for audio tests
 jiwer # required for audio tests
diff --git a/requirements/test.in b/requirements/test.in
index 3c5e3c0204bfb..9ecaaae92727f 100644
--- a/requirements/test.in
+++ b/requirements/test.in
@@ -17,7 +17,7 @@ vector_quantize_pytorch # required for minicpmo_26 test
 vocos # required for minicpmo_26 test
 peft>=0.15.0 # required for phi-4-mm test
 pqdm
-ray[cgraph,default]>=2.43.0, !=2.44.* # Ray Compiled Graph, required by pipeline parallelism tests
+ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
 sentence-transformers # required for embedding tests
 soundfile # required for audio tests
 jiwer # required for audio tests
diff --git a/requirements/test.txt b/requirements/test.txt
index d45048aae5809..691420df87c48 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -22,9 +22,7 @@ aiohttp==3.10.11
 aiohttp-cors==0.8.1
     # via ray
 aiosignal==1.3.1
-    # via
-    #   aiohttp
-    #   ray
+    # via aiohttp
 albucore==0.0.16
     # via terratorch
 albumentations==1.4.6
@@ -226,7 +224,6 @@ frozenlist==1.5.0
     # via
     #   aiohttp
     #   aiosignal
-    #   ray
 fsspec==2024.9.0
     # via
     #   datasets
@@ -603,10 +600,18 @@ opencv-python-headless==4.11.0.86
 opentelemetry-api==1.35.0
     # via
     #   mlflow-skinny
+    #   opentelemetry-exporter-prometheus
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
+opentelemetry-exporter-prometheus==0.56b0
+    # via ray
+opentelemetry-proto==1.36.0
+    # via ray
 opentelemetry-sdk==1.35.0
-    # via mlflow-skinny
+    # via
+    #   mlflow-skinny
+    #   opentelemetry-exporter-prometheus
+    #   ray
 opentelemetry-semantic-conventions==0.56b0
     # via opentelemetry-sdk
 packaging==24.2
@@ -697,7 +702,9 @@ pqdm==0.2.0
 pretrainedmodels==0.7.4
     # via segmentation-models-pytorch
 prometheus-client==0.22.0
-    # via ray
+    # via
+    #   opentelemetry-exporter-prometheus
+    #   ray
 propcache==0.2.0
     # via yarl
 proto-plus==1.26.1
@@ -707,6 +714,7 @@ protobuf==5.28.3
     #   google-api-core
     #   googleapis-common-protos
     #   mlflow-skinny
+    #   opentelemetry-proto
     #   proto-plus
     #   ray
     #   tensorboardx
@@ -854,7 +862,7 @@ rasterio==1.4.3
     #   rioxarray
     #   terratorch
     #   torchgeo
-ray==2.43.0
+ray==2.48.0
     # via -r requirements/test.in
 redis==5.2.0
     # via tensorizer

From 337eb23bcca6257a75e2c8677c4698bbff9f4a81 Mon Sep 17 00:00:00 2001
From: jiahanc <173873397+jiahanc@users.noreply.github.com>
Date: Sun, 3 Aug 2025 00:50:34 -0700
Subject: [PATCH 168/224] [Fix] Fix llama4 modelopt weight loading error
 (#22107)

Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com>
Co-authored-by: mgoin <mgoin64@gmail.com>
---
 vllm/model_executor/models/mllama4.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py
index 924f10d82b381..e73dc0c2be82e 100644
--- a/vllm/model_executor/models/mllama4.py
+++ b/vllm/model_executor/models/mllama4.py
@@ -906,11 +906,13 @@ class Llama4ForConditionalGeneration(nn.Module, SupportsMultiModal,
     def _rename_weight_for_modelopt_checkpoint(self, name: str) -> str:
         """Rename weights from ModelOpt llama4 fp8 checkpoints to vLLM
         format."""
-        if name.startswith("model."):
+        if name.startswith("model.") or name.startswith(
+                "language_model.model."):
+            renamed = name.replace("model.", "language_model.model.",
+                                   1) if name.startswith("model.") else name
             # Handle expert scale parameters with flat naming
             if "feed_forward.experts." in name and ("_input_scale" in name or
                                                     "_weight_scale" in name):
-                renamed = name.replace("model.", "language_model.model.", 1)
                 # Map checkpoint naming to vLLM's expected naming
                 if "down_proj_input_scale" in renamed:
                     return renamed.replace("down_proj_input_scale",
@@ -929,7 +931,6 @@ class Llama4ForConditionalGeneration(nn.Module, SupportsMultiModal,
             # Handle attention scale parameters
             elif "self_attn." in name and (".k_scale" in name
                                            or ".v_scale" in name):
-                renamed = name.replace("model.", "language_model.model.", 1)
                 if ".k_proj.k_scale" in renamed:
                     return renamed.replace(".k_proj.k_scale", ".attn.k_scale")
                 elif ".v_proj.v_scale" in renamed:
@@ -937,7 +938,7 @@ class Llama4ForConditionalGeneration(nn.Module, SupportsMultiModal,
                 return renamed
 
             # Standard model.* to language_model.model.* renaming
-            return name.replace("model.", "language_model.model.", 1)
+            return renamed
 
         elif name.startswith("lm_head.weight"):
             return name.replace("lm_head.weight",

From 3dddbf1f2545740659a9cb975b7becca2c3dc0e6 Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Sun, 3 Aug 2025 15:52:14 +0800
Subject: [PATCH 169/224] [Misc] Add tensor schema test coverage for multimodal
 models (#21754)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: Isotr0py <2037008807@qq.com>
---
 .buildkite/test-pipeline.yaml                 |   3 +-
 tests/conftest.py                             |   2 +-
 tests/models/multimodal/test_tensor_schema.py | 199 ++++++++++++++++++
 tests/models/registry.py                      |   7 +-
 vllm/model_executor/models/deepseek_vl2.py    |   3 +-
 vllm/model_executor/models/keye.py            |  17 +-
 .../processors/deepseek_vl2.py                |   6 +-
 7 files changed, 222 insertions(+), 15 deletions(-)
 create mode 100644 tests/models/multimodal/test_tensor_schema.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index cc1223d4c4653..88e1197d703a4 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -581,7 +581,8 @@ steps:
     - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
     - pip freeze | grep -E 'torch'
     - pytest -v -s models/multimodal/processing
-    - pytest -v -s --ignore models/multimodal/generation/test_whisper.py models/multimodal -m core_model
+    - pytest -v -s --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/test_tensor_schema.py models/multimodal -m core_model
+    - pytest -v -s models/multimodal/test_tensor_schema.py -m core_model  # Needs mp_method="spawn"
     - cd .. && pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work
 
 - label: Multi-Modal Models Test (Extended) 1
diff --git a/tests/conftest.py b/tests/conftest.py
index 67f0e7424038c..3f3790cab8d35 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -775,7 +775,7 @@ class VllmRunner:
         tokenizer_mode: str = "auto",
         trust_remote_code: bool = True,
         seed: Optional[int] = 0,
-        max_model_len: int = 1024,
+        max_model_len: Optional[int] = 1024,
         dtype: str = "auto",
         disable_log_stats: bool = True,
         tensor_parallel_size: int = 1,
diff --git a/tests/models/multimodal/test_tensor_schema.py b/tests/models/multimodal/test_tensor_schema.py
new file mode 100644
index 0000000000000..bdc62b1d2682d
--- /dev/null
+++ b/tests/models/multimodal/test_tensor_schema.py
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from functools import partial
+from typing import Any
+from unittest.mock import patch
+
+import pytest
+from transformers import PretrainedConfig
+
+from vllm.config import ModelConfig
+from vllm.engine.llm_engine import LLMEngine as V0LLMEngine
+from vllm.inputs import InputProcessingContext
+from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargs
+from vllm.multimodal.processing import BaseMultiModalProcessor
+from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
+from vllm.utils import GiB_bytes, set_default_torch_num_threads
+from vllm.v1.core.kv_cache_utils import get_kv_cache_config
+from vllm.v1.engine.core import EngineCore as V1EngineCore
+
+from ...conftest import VllmRunner
+from ..registry import _MULTIMODAL_EXAMPLE_MODELS, HF_EXAMPLE_MODELS
+
+ARCH_TO_SKIP = {
+    "MolmoForCausalLM": "incompatible requirements",
+    "MiniMaxVL01ForConditionalGeneration": "broken model",
+}
+
+
+def create_batched_mm_kwargs(
+    model_config: ModelConfig,
+    processor: BaseMultiModalProcessor,
+) -> MultiModalKwargs:
+    processing_info = processor.info
+    dummy_inputs = processor.dummy_inputs
+    supported_mm_limits = processing_info.get_supported_mm_limits()
+    mm_counts = {
+        modality: 3 if limit is None else limit
+        for modality, limit in supported_mm_limits.items()
+    }
+    processor_inputs = dummy_inputs.get_dummy_processor_inputs(
+        seq_len=model_config.max_model_len,
+        mm_counts=mm_counts,
+    )
+    mm_kwargs = processor.apply(
+        prompt=processor_inputs.prompt,
+        mm_data=processor_inputs.mm_data,
+        hf_processor_mm_kwargs=processor_inputs.hf_processor_mm_kwargs,
+        tokenization_kwargs=processor_inputs.tokenization_kwargs,
+    )["mm_kwargs"]
+    mm_kwargs = MultiModalKwargs.batch([mm_kwargs])
+    return mm_kwargs
+
+
+# Avoid OOM and reduce initialization time by only using 1 layer
+def hf_overrides(hf_config: PretrainedConfig,
+                 exist_overrides: dict[str, Any]) -> PretrainedConfig:
+    hf_config.update(exist_overrides)
+    text_config = hf_config.get_text_config()
+    # Ensure at least 2 expert per group
+    # Since `grouped_topk` assumes top-2
+    n_group = getattr(text_config, 'n_group', None)
+    num_experts = n_group * 2 if n_group is not None else 2
+    # we use three layers for Gemma-3n to check
+    # both normal layer and kv_shared_layer
+    text_config.update({
+        "num_layers": 1,
+        "num_hidden_layers": 1,
+        "num_experts": num_experts,
+        "num_experts_per_tok": 2,
+        "num_local_experts": num_experts,
+        # Otherwise there will not be any expert layers
+        "first_k_dense_replace": 0,
+        # To avoid OOM on DeepSeek-V3
+        "n_routed_experts": num_experts,
+        # For Gemma-3n
+        "num_kv_shared_layers": 1,
+    })
+    if hasattr(hf_config, "vision_config"):
+        hf_config.vision_config.update({
+            "num_layers": 1,
+            "num_hidden_layers": 1,
+        })
+    # e.g.: ibm-granite/granite-speech-3.3-2b
+    if hasattr(hf_config, "encoder_config"):
+        hf_config.encoder_config.update({
+            "num_layers": 1,
+            "num_hidden_layers": 1,
+        })
+    # e.g.: Qwen/Qwen2-Audio-7B-Instruct
+    if hasattr(hf_config, "audio_config"):
+        hf_config.audio_config.update({
+            "num_layers": 1,
+            "num_hidden_layers": 1,
+            "encoder_layers": 1,
+        })
+    return hf_config
+
+
+@pytest.mark.core_model
+@pytest.mark.parametrize("model_arch", list(_MULTIMODAL_EXAMPLE_MODELS.keys()))
+def test_model_tensor_schema(model_arch: str, vllm_runner: type[VllmRunner],
+                             monkeypatch):
+    if model_arch in ARCH_TO_SKIP:
+        pytest.skip(f"Skipping {model_arch} due to {ARCH_TO_SKIP[model_arch]}")
+
+    model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
+    model_info.check_available_online(on_fail="skip")
+
+    model_id = model_info.default
+
+    hf_overrides_fn = partial(hf_overrides,
+                              exist_overrides=model_info.hf_overrides)
+
+    model_config = ModelConfig(
+        model_id,
+        tokenizer=model_info.tokenizer or model_id,
+        tokenizer_mode=model_info.tokenizer_mode,
+        revision=model_info.revision,
+        trust_remote_code=model_info.trust_remote_code,
+        hf_overrides=model_info.hf_overrides,
+    )
+    model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
+    factories = MULTIMODAL_REGISTRY._processor_factories[model_cls]
+
+    if not any(
+            hasattr(model_cls, f"_parse_and_validate_{m}_input")
+            for m in ["image", "video", "audio"]):
+        pytest.skip(f"{model_arch} does not support tensor schema validation.")
+
+    ctx = InputProcessingContext(
+        model_config,
+        tokenizer=cached_tokenizer_from_config(model_config),
+    )
+    processing_info = factories.info(ctx)
+    supported_mm_limits = processing_info.get_supported_mm_limits()
+    limit_mm_per_prompt = {
+        modality: 3 if limit is None else limit
+        for modality, limit in supported_mm_limits.items()
+    }
+
+    # Avoid calling model.forward()
+    def _initialize_kv_caches_v0(self) -> None:
+        self.cache_config.num_gpu_blocks = 0
+        self.cache_config.num_cpu_blocks = 0
+
+    def _initialize_kv_caches_v1(self, vllm_config):
+        kv_cache_specs = self.model_executor.get_kv_cache_specs()
+        scheduler_kv_cache_config = get_kv_cache_config(
+            vllm_config,
+            kv_cache_specs[0],
+            10 * GiB_bytes,
+        )
+
+        # gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config
+        return 1, 0, scheduler_kv_cache_config
+
+    with (patch.object(V0LLMEngine, "_initialize_kv_caches",
+                       _initialize_kv_caches_v0),
+          patch.object(V1EngineCore, "_initialize_kv_caches",
+                       _initialize_kv_caches_v1), monkeypatch.context() as m):
+        m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+        if model_info.v0_only:
+            m.setenv("VLLM_USE_V1", "0")
+
+        with (
+                set_default_torch_num_threads(1),
+                vllm_runner(
+                    model_id,
+                    tokenizer_name=model_info.tokenizer,
+                    tokenizer_mode=model_info.tokenizer_mode,
+                    revision=model_info.revision,
+                    trust_remote_code=model_info.trust_remote_code,
+                    max_model_len=model_info.max_model_len,
+                    load_format="dummy",
+                    hf_overrides=hf_overrides_fn,
+                    limit_mm_per_prompt=limit_mm_per_prompt,
+                    enforce_eager=True,
+                ) as vllm_model,
+        ):
+            model_config = vllm_model.llm.llm_engine.model_config
+            llm_engine = vllm_model.llm.llm_engine
+
+            if hasattr(llm_engine, "processor"):
+                # v1 processor
+                mm_registry = llm_engine.processor.mm_registry
+            else:
+                # v0 input_preprocessor
+                mm_registry = llm_engine.input_preprocessor.mm_registry
+
+            processor = mm_registry.create_processor(model_config)
+            mm_kwargs = create_batched_mm_kwargs(model_config, processor)
+
+            def validate_model_input(model):
+                for modality in ("audio", "image", "video"):
+                    method_name = f"_parse_and_validate_{modality}_input"
+                    if hasattr(model, method_name):
+                        getattr(model, method_name)(**mm_kwargs)
+
+            vllm_model.apply_model(validate_model_input)
diff --git a/tests/models/registry.py b/tests/models/registry.py
index d88d77cddcca5..8fc870cf85642 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -383,6 +383,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
     "Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
                                           is_available_online=False),   # noqa: E501
     "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
+                                      trust_remote_code=True,
                                       extras={"2b": "h2oai/h2ovl-mississippi-2b"},  # noqa: E501
                                       max_transformers_version="4.48",  # noqa: E501
                                       transformers_version_reason="HF model is not compatible."),  # noqa: E501
@@ -432,6 +433,9 @@ _MULTIMODAL_EXAMPLE_MODELS = {
                               trust_remote_code=True),
     "Llama_Nemotron_Nano_VL" : _HfExamplesInfo("nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1", # noqa: E501
                                                      trust_remote_code=True),
+    "Ovis": _HfExamplesInfo("AIDC-AI/Ovis2-1B", trust_remote_code=True,
+                            extras={"1.6-llama": "AIDC-AI/Ovis1.6-Llama3.2-3B",
+                                    "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B"}),  # noqa: E501
     "PaliGemmaForConditionalGeneration": _HfExamplesInfo("google/paligemma-3b-mix-224",  # noqa: E501
                                                          extras={"v2": "google/paligemma2-3b-ft-docci-448"}),  # noqa: E501
     "Phi3VForCausalLM": _HfExamplesInfo("microsoft/Phi-3-vision-128k-instruct",
@@ -439,9 +443,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
                                         max_transformers_version="4.48",
                                         transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
                                         extras={"phi3.5": "microsoft/Phi-3.5-vision-instruct"}),  # noqa: E501
-    "Ovis": _HfExamplesInfo("AIDC-AI/Ovis2-1B", trust_remote_code=True,
-                            extras={"1.6-llama": "AIDC-AI/Ovis1.6-Llama3.2-3B",
-                                    "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B"}),  # noqa: E501
     "Phi4MMForCausalLM": _HfExamplesInfo("microsoft/Phi-4-multimodal-instruct",
                                         trust_remote_code=True),
     "Phi4MultimodalForCausalLM": _HfExamplesInfo("microsoft/Phi-4-multimodal-instruct",  # noqa: E501
diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py
index 531018625478b..e0acca75d9dd6 100644
--- a/vllm/model_executor/models/deepseek_vl2.py
+++ b/vllm/model_executor/models/deepseek_vl2.py
@@ -51,13 +51,14 @@ class DeepseekVL2ImagePixelInputs(TensorSchema):
     """
     Dimensions:
         - bn: Batch size * number of images
+        - p: Number of patches
         - c: Number of channels (3)
         - h: Height of each image
         - w: Width of each image
     """
     type: Literal["pixel_values"]
     data: Annotated[Union[torch.Tensor, list[torch.Tensor]],
-                    TensorShape("bn", 3, "h", "w")]
+                    TensorShape("bn", "p", 3, "h", "w", dynamic_dims={"p"})]
     images_spatial_crop: Annotated[torch.Tensor, TensorShape("bn", 2)]
 
 
diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py
index 4d8aa8de0f0b1..40c66c2268507 100644
--- a/vllm/model_executor/models/keye.py
+++ b/vllm/model_executor/models/keye.py
@@ -104,13 +104,16 @@ def smart_resize(
 class KeyeImagePixelInputs(TensorSchema):
     """
     Dimensions:
+        - b: Batch size
         - np: Number of patches
-        - cps: Number of channels * patch_size * patch_size
+        - c: Number of channels
+        - ps: Patch size
         - ni: Number of images
         - g: Grid dimensions (3 for t, h, w)
     """
     type: Literal["pixel_values"]
-    pixel_values: Annotated[torch.Tensor, TensorShape("np", "cps")]
+    pixel_values: Annotated[torch.Tensor,
+                            TensorShape("b", "np", 3, "ps", "ps")]
     image_grid_thw: Annotated[torch.Tensor, TensorShape("ni", 3)]
 
 
@@ -134,14 +137,16 @@ KeyeImageInputs = Union[KeyeImagePixelInputs, KeyeImageEmbeddingInputs]
 class KeyeVideoPixelInputs(TensorSchema):
     """
     Dimensions:
+        - b: Batch size
         - np: Number of patches
-        - ctps: Number of channels * temporal_patch_size * patch_size * 
-          patch_size
-        - nv: Number of videos
+        - c: Number of channels
+        - ps: Patch size
+        - ni: Number of images
         - g: Grid dimensions (3 for t, h, w)
     """
     type: Literal["pixel_values_videos"]
-    pixel_values_videos: Annotated[torch.Tensor, TensorShape("np", "ctps")]
+    pixel_values_videos: Annotated[torch.Tensor,
+                                   TensorShape("b", "np", 3, "ps", "ps")]
     video_grid_thw: Annotated[torch.Tensor, TensorShape("nv", 3)]
 
 
diff --git a/vllm/transformers_utils/processors/deepseek_vl2.py b/vllm/transformers_utils/processors/deepseek_vl2.py
index b4669d12fa213..5896bde312657 100644
--- a/vllm/transformers_utils/processors/deepseek_vl2.py
+++ b/vllm/transformers_utils/processors/deepseek_vl2.py
@@ -256,7 +256,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
     def __call__(
         self,
         *,
-        prompt: str,
+        text: str,
         images: list[Image.Image],
         inference_mode: bool = True,
         **kwargs,
@@ -264,7 +264,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
         """
 
         Args:
-            prompt (str): the formatted prompt;
+            text (str): the formatted prompt;
             images (list[ImageType]): the list of images;
             inference_mode (bool): if True, then remove the last eos token;
             **kwargs:
@@ -278,7 +278,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
         """
 
         prepare = self.process_one(
-            prompt=prompt,
+            prompt=text,
             images=images,
             inference_mode=inference_mode,
         )

From 3f36c325fa6cd086ab3dea40866f8ab0d7f8ef6e Mon Sep 17 00:00:00 2001
From: "Ye (Charlotte) Qi" <yeq@meta.com>
Date: Sun, 3 Aug 2025 00:52:38 -0700
Subject: [PATCH 170/224] [Benchmark] Support ready check timeout in `vllm
 bench serve` (#21696)

Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Co-authored-by: Roger Wang <hey@rogerw.me>
---
 vllm/benchmarks/latency.py                    |  4 +-
 vllm/benchmarks/lib/__init__.py               |  3 +
 .../{ => lib}/endpoint_request_func.py        |  0
 vllm/benchmarks/lib/ready_checker.py          | 70 +++++++++++++++++++
 vllm/benchmarks/{ => lib}/utils.py            |  0
 vllm/benchmarks/serve.py                      | 24 +++++--
 vllm/benchmarks/throughput.py                 |  4 +-
 7 files changed, 94 insertions(+), 11 deletions(-)
 create mode 100644 vllm/benchmarks/lib/__init__.py
 rename vllm/benchmarks/{ => lib}/endpoint_request_func.py (100%)
 create mode 100644 vllm/benchmarks/lib/ready_checker.py
 rename vllm/benchmarks/{ => lib}/utils.py (100%)

diff --git a/vllm/benchmarks/latency.py b/vllm/benchmarks/latency.py
index 5c6124db80b4f..cebdf56c45b1b 100644
--- a/vllm/benchmarks/latency.py
+++ b/vllm/benchmarks/latency.py
@@ -14,8 +14,8 @@ from tqdm import tqdm
 
 import vllm.envs as envs
 from vllm import LLM, SamplingParams
-from vllm.benchmarks.utils import (convert_to_pytorch_benchmark_format,
-                                   write_to_json)
+from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
+                                       write_to_json)
 from vllm.engine.arg_utils import EngineArgs
 from vllm.inputs import PromptType
 from vllm.sampling_params import BeamSearchParams
diff --git a/vllm/benchmarks/lib/__init__.py b/vllm/benchmarks/lib/__init__.py
new file mode 100644
index 0000000000000..005e87af61949
--- /dev/null
+++ b/vllm/benchmarks/lib/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Benchmark library utilities."""
diff --git a/vllm/benchmarks/endpoint_request_func.py b/vllm/benchmarks/lib/endpoint_request_func.py
similarity index 100%
rename from vllm/benchmarks/endpoint_request_func.py
rename to vllm/benchmarks/lib/endpoint_request_func.py
diff --git a/vllm/benchmarks/lib/ready_checker.py b/vllm/benchmarks/lib/ready_checker.py
new file mode 100644
index 0000000000000..a663f85b629d2
--- /dev/null
+++ b/vllm/benchmarks/lib/ready_checker.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Utilities for checking endpoint readiness."""
+
+import asyncio
+import time
+
+import aiohttp
+from tqdm.asyncio import tqdm
+
+from .endpoint_request_func import RequestFuncInput, RequestFuncOutput
+
+
+async def wait_for_endpoint(
+    request_func,
+    test_input: RequestFuncInput,
+    timeout_seconds: int = 600,
+    retry_interval: int = 5,
+) -> RequestFuncOutput:
+    """
+    Wait for an endpoint to become available before starting benchmarks.
+    
+    Args:
+        request_func: The async request function to call
+        test_input: The RequestFuncInput to test with
+        timeout_seconds: Maximum time to wait in seconds (default: 10 minutes)
+        retry_interval: Time between retries in seconds (default: 5 seconds)
+        
+    Returns:
+        RequestFuncOutput: The successful response
+        
+    Raises:
+        ValueError: If the endpoint doesn't become available within the timeout
+    """
+    deadline = time.perf_counter() + timeout_seconds
+    output = RequestFuncOutput(success=False)
+    print(f"Waiting for endpoint to become up in {timeout_seconds} seconds")
+    
+    with tqdm(
+        total=timeout_seconds, 
+        bar_format="{desc} |{bar}| {elapsed} elapsed, {remaining} remaining",
+        unit="s",
+    ) as pbar:
+
+        while True:            
+            # update progress bar
+            remaining = deadline - time.perf_counter()
+            elapsed = timeout_seconds - remaining
+            update_amount = min(elapsed - pbar.n, timeout_seconds - pbar.n)
+            pbar.update(update_amount)
+            pbar.refresh()
+            if remaining <= 0:
+                pbar.close()
+                break
+
+            # ping the endpoint using request_func
+            try:
+                output = await request_func(request_func_input=test_input)
+                if output.success:
+                    pbar.close()
+                    return output
+            except aiohttp.ClientConnectorError:
+                pass
+            
+            # retry after a delay
+            sleep_duration = min(retry_interval, remaining)
+            if sleep_duration > 0:
+                await asyncio.sleep(sleep_duration)
+    
+    return output
diff --git a/vllm/benchmarks/utils.py b/vllm/benchmarks/lib/utils.py
similarity index 100%
rename from vllm/benchmarks/utils.py
rename to vllm/benchmarks/lib/utils.py
diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py
index bd2b1e5990c83..45798547ac719 100644
--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -34,12 +34,12 @@ from transformers import PreTrainedTokenizerBase
 
 from vllm.benchmarks.datasets import (SampleRequest, add_dataset_parser,
                                       get_samples)
-from vllm.benchmarks.endpoint_request_func import (ASYNC_REQUEST_FUNCS,
-                                                   OPENAI_COMPATIBLE_BACKENDS,
-                                                   RequestFuncInput,
-                                                   RequestFuncOutput)
-from vllm.benchmarks.utils import (convert_to_pytorch_benchmark_format,
-                                   write_to_json)
+from vllm.benchmarks.lib.endpoint_request_func import (
+    ASYNC_REQUEST_FUNCS, OPENAI_COMPATIBLE_BACKENDS, RequestFuncInput,
+    RequestFuncOutput)
+from vllm.benchmarks.lib.ready_checker import wait_for_endpoint
+from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
+                                       write_to_json)
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
 MILLISECONDS_TO_SECONDS_CONVERSION = 1000
@@ -331,6 +331,7 @@ async def benchmark(
     ramp_up_strategy: Optional[Literal["linear", "exponential"]] = None,
     ramp_up_start_rps: Optional[int] = None,
     ramp_up_end_rps: Optional[int] = None,
+    ready_check_timeout_sec: int = 600,
 ):
     if endpoint_type in ASYNC_REQUEST_FUNCS:
         request_func = ASYNC_REQUEST_FUNCS[endpoint_type]
@@ -359,7 +360,8 @@ async def benchmark(
         extra_body=extra_body,
     )
 
-    test_output = await request_func(request_func_input=test_input)
+    test_output = await wait_for_endpoint(
+        request_func, test_input, timeout_seconds=ready_check_timeout_sec)
     if not test_output.success:
         raise ValueError(
             "Initial test run failed - Please make sure benchmark arguments "
@@ -907,6 +909,13 @@ def add_cli_args(parser: argparse.ArgumentParser):
         help="The ending request rate for ramp-up (RPS). "
         "Needs to be specified when --ramp-up-strategy is used.",
     )
+    parser.add_argument(
+        "--ready-check-timeout-sec",
+        type=int,
+        default=600,
+        help="Maximum time to wait for the endpoint to become ready "
+        "in seconds (default: 600 seconds / 10 minutes).",
+    )
 
 
 def main(args: argparse.Namespace):
@@ -1012,6 +1021,7 @@ def main(args: argparse.Namespace):
             ramp_up_strategy=args.ramp_up_strategy,
             ramp_up_start_rps=args.ramp_up_start_rps,
             ramp_up_end_rps=args.ramp_up_end_rps,
+            ready_check_timeout_sec=args.ready_check_timeout_sec,
         ))
 
     # Save config and results to json
diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py
index 0fe042e2736da..bbd18ca3ae22e 100644
--- a/vllm/benchmarks/throughput.py
+++ b/vllm/benchmarks/throughput.py
@@ -21,8 +21,8 @@ from vllm.benchmarks.datasets import (AIMODataset, BurstGPTDataset,
                                       InstructCoderDataset, RandomDataset,
                                       SampleRequest, ShareGPTDataset,
                                       SonnetDataset, VisionArenaDataset)
-from vllm.benchmarks.utils import (convert_to_pytorch_benchmark_format,
-                                   write_to_json)
+from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
+                                       write_to_json)
 from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
 from vllm.entrypoints.openai.api_server import (
     build_async_engine_client_from_engine_args)

From 789562c28c143201a1d2ca35f7adcdf54ef832e5 Mon Sep 17 00:00:00 2001
From: "Roberto L. Castro"
 <38211239+LopezCastroRoberto@users.noreply.github.com>
Date: Sun, 3 Aug 2025 09:54:22 +0200
Subject: [PATCH 171/224] Support CUTLASS NVFP4 (w4a4) for Blackwell Geforce
 GPUs (SM120) (#21309)

Signed-off-by: LopezCastroRoberto <roberto.lopez.castro@udc.es>
---
 CMakeLists.txt                                |  21 +-
 .../fp4/nvfp4_blockwise_moe_kernel.cu         |   6 +-
 csrc/quantization/fp4/nvfp4_quant_entry.cu    |  14 +-
 csrc/quantization/fp4/nvfp4_quant_kernels.cu  |   2 +-
 .../quantization/fp4/nvfp4_scaled_mm_entry.cu |  14 +-
 .../fp4/nvfp4_scaled_mm_sm120_kernels.cu      | 285 ++++++++++++++++++
 6 files changed, 329 insertions(+), 13 deletions(-)
 create mode 100644 csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea56b8451f228..e2cc0ccdef515 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -529,6 +529,25 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     endif()
   endif()
 
+  # The nvfp4_scaled_mm_sm120 kernels for Geforce Blackwell SM120 require
+  # CUDA 12.8 or later
+  cuda_archs_loose_intersection(FP4_ARCHS "12.0;12.0a" "${CUDA_ARCHS}")
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND FP4_ARCHS)
+    set(SRCS
+      "csrc/quantization/fp4/nvfp4_quant_kernels.cu"
+      "csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${SRCS}"
+      CUDA_ARCHS "${FP4_ARCHS}")
+    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_GPU_FLAGS "-DENABLE_NVFP4_SM120=1")
+    message(STATUS "Building NVFP4 for archs: ${FP4_ARCHS}")
+  else()
+    message(STATUS "Not building NVFP4 as no compatible archs were found.")
+    # clear FP4_ARCHS
+    set(FP4_ARCHS)
+  endif()
+
   # FP4 Archs and flags
   cuda_archs_loose_intersection(FP4_ARCHS "10.0a" "${CUDA_ARCHS}")
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND FP4_ARCHS)
@@ -541,7 +560,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
       SRCS "${SRCS}"
       CUDA_ARCHS "${FP4_ARCHS}")
     list(APPEND VLLM_EXT_SRC "${SRCS}")
-    list(APPEND VLLM_GPU_FLAGS "-DENABLE_NVFP4=1")
+    list(APPEND VLLM_GPU_FLAGS "-DENABLE_NVFP4_SM100=1")
     list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MOE_SM100=1")
     message(STATUS "Building NVFP4 for archs: ${FP4_ARCHS}")
   else()
diff --git a/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu b/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
index a21ee55b65862..03db5cc196d59 100644
--- a/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
+++ b/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
@@ -335,7 +335,7 @@ void run_fp4_blockwise_scaled_group_mm(
   TORCH_CHECK(status == cutlass::Status::kSuccess, "Failed to run GEMM");
 }
 
-#if defined ENABLE_NVFP4 && ENABLE_NVFP4
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
 constexpr auto FLOAT4_E2M1X2 = at::ScalarType::Byte;
 constexpr auto SF_DTYPE = at::ScalarType::Float8_e4m3fn;
 #endif
@@ -356,7 +356,7 @@ void cutlass_fp4_group_mm(
     const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
     const torch::Tensor& alphas, const torch::Tensor& problem_sizes,
     const torch::Tensor& expert_offsets, const torch::Tensor& sf_offsets) {
-#if defined ENABLE_NVFP4 && ENABLE_NVFP4
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
   // Input validation
   CHECK_INPUT(a, FLOAT4_E2M1X2, "a");
   CHECK_INPUT(b, FLOAT4_E2M1X2, "b");
@@ -398,7 +398,7 @@ void cutlass_fp4_group_mm(
   TORCH_CHECK_NOT_IMPLEMENTED(
       false,
       "No compiled cutlass_fp4_group_mm kernel, vLLM must "
-      "be compiled with ENABLE_NVFP4 for SM100+ and CUDA "
+      "be compiled with ENABLE_NVFP4_SM100 for SM100+ and CUDA "
       "12.8 or above.");
 #endif
 }
diff --git a/csrc/quantization/fp4/nvfp4_quant_entry.cu b/csrc/quantization/fp4/nvfp4_quant_entry.cu
index badbb7e310df0..1b61bd4519fc3 100644
--- a/csrc/quantization/fp4/nvfp4_quant_entry.cu
+++ b/csrc/quantization/fp4/nvfp4_quant_entry.cu
@@ -16,14 +16,15 @@
 
 #include <torch/all.h>
 
-#if defined ENABLE_NVFP4 && ENABLE_NVFP4
-void scaled_fp4_quant_sm100a(torch::Tensor const& output,
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+void scaled_fp4_quant_sm1xxa(torch::Tensor const& output,
                              torch::Tensor const& input,
                              torch::Tensor const& output_sf,
                              torch::Tensor const& input_sf);
 #endif
 
-#if defined ENABLE_NVFP4 && ENABLE_NVFP4
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
 void scaled_fp4_experts_quant_sm100a(
     torch::Tensor& output, torch::Tensor& output_scale,
     torch::Tensor const& input, torch::Tensor const& input_global_scale,
@@ -33,8 +34,9 @@ void scaled_fp4_experts_quant_sm100a(
 
 void scaled_fp4_quant(torch::Tensor& output, torch::Tensor const& input,
                       torch::Tensor& output_sf, torch::Tensor const& input_sf) {
-#if defined ENABLE_NVFP4 && ENABLE_NVFP4
-  return scaled_fp4_quant_sm100a(output, input, output_sf, input_sf);
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+  return scaled_fp4_quant_sm1xxa(output, input, output_sf, input_sf);
 #endif
   TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled nvfp4 quantization kernel");
 }
@@ -44,7 +46,7 @@ void scaled_fp4_experts_quant(
     torch::Tensor const& input, torch::Tensor const& input_global_scale,
     torch::Tensor const& input_offset_by_experts,
     torch::Tensor const& output_scale_offset_by_experts) {
-#if defined ENABLE_NVFP4 && ENABLE_NVFP4
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
   return scaled_fp4_experts_quant_sm100a(
       output, output_scale, input, input_global_scale, input_offset_by_experts,
       output_scale_offset_by_experts);
diff --git a/csrc/quantization/fp4/nvfp4_quant_kernels.cu b/csrc/quantization/fp4/nvfp4_quant_kernels.cu
index d32911357a953..4e080de151648 100644
--- a/csrc/quantization/fp4/nvfp4_quant_kernels.cu
+++ b/csrc/quantization/fp4/nvfp4_quant_kernels.cu
@@ -332,7 +332,7 @@ template void invokeFP4Quantization(int m, int n, __nv_bfloat16 const* input,
                                     int multiProcessorCount,
                                     cudaStream_t stream);
 
-void scaled_fp4_quant_sm100a(torch::Tensor const& output,
+void scaled_fp4_quant_sm1xxa(torch::Tensor const& output,
                              torch::Tensor const& input,
                              torch::Tensor const& output_sf,
                              torch::Tensor const& input_sf) {
diff --git a/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu b/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu
index 61b75e92dfaa0..9cba2828aac2e 100644
--- a/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu
+++ b/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu
@@ -16,7 +16,7 @@
 
 #include <torch/all.h>
 
-#if defined ENABLE_NVFP4 && ENABLE_NVFP4
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
 void cutlass_scaled_fp4_mm_sm100a(torch::Tensor& D, torch::Tensor const& A,
                                   torch::Tensor const& B,
                                   torch::Tensor const& A_sf,
@@ -24,12 +24,22 @@ void cutlass_scaled_fp4_mm_sm100a(torch::Tensor& D, torch::Tensor const& A,
                                   torch::Tensor const& alpha);
 #endif
 
+#if defined ENABLE_NVFP4_SM120 && ENABLE_NVFP4_SM120
+void cutlass_scaled_fp4_mm_sm120a(torch::Tensor& D, torch::Tensor const& A,
+                                  torch::Tensor const& B,
+                                  torch::Tensor const& A_sf,
+                                  torch::Tensor const& B_sf,
+                                  torch::Tensor const& alpha);
+#endif
+
 void cutlass_scaled_fp4_mm(torch::Tensor& D, torch::Tensor const& A,
                            torch::Tensor const& B, torch::Tensor const& A_sf,
                            torch::Tensor const& B_sf,
                            torch::Tensor const& alpha) {
-#if defined ENABLE_NVFP4 && ENABLE_NVFP4
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
   return cutlass_scaled_fp4_mm_sm100a(D, A, B, A_sf, B_sf, alpha);
+#elif defined ENABLE_NVFP4_SM120 && ENABLE_NVFP4_SM120
+  return cutlass_scaled_fp4_mm_sm120a(D, A, B, A_sf, B_sf, alpha);
 #endif
   TORCH_CHECK_NOT_IMPLEMENTED(false,
                               "No compiled nvfp4 mm kernel, vLLM should "
diff --git a/csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu b/csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu
new file mode 100644
index 0000000000000..89de23b76e65d
--- /dev/null
+++ b/csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <torch/all.h>
+
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+#include "cutlass_extensions/common.hpp"
+
+#include "cutlass/cutlass.h"
+
+#include "cutlass/gemm/collective/collective_builder.hpp"
+#include "cutlass/epilogue/collective/collective_builder.hpp"
+#include "cutlass/gemm/device/gemm_universal_adapter.h"
+#include "cutlass/gemm/kernel/gemm_universal.hpp"
+
+#include "cutlass/util/packed_stride.hpp"
+
+#include "core/math.hpp"
+
+using namespace cute;
+
+#define CHECK_TYPE(x, st, m) \
+  TORCH_CHECK(x.scalar_type() == st, ": Inconsistency of Tensor type:", m)
+#define CHECK_TH_CUDA(x, m) \
+  TORCH_CHECK(x.is_cuda(), m, ": must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x, m) \
+  TORCH_CHECK(x.is_contiguous(), m, ": must be contiguous")
+#define CHECK_INPUT(x, st, m) \
+  CHECK_TH_CUDA(x, m);        \
+  CHECK_CONTIGUOUS(x, m);     \
+  CHECK_TYPE(x, st, m)
+
+constexpr auto FLOAT4_E2M1X2 = at::ScalarType::Byte;
+constexpr auto SF_DTYPE = at::ScalarType::Float8_e4m3fn;
+
+struct sm120_fp4_config_M256 {
+  using ClusterShape = Shape<_1, _1, _1>;
+  using MmaTileShape = Shape<_128, _128, _128>;
+  using PerSmTileShape_MNK = Shape<_128, _128, _128>;
+};
+
+struct sm120_fp4_config_default {
+  using ClusterShape = Shape<_1, _1, _1>;
+  using MmaTileShape = Shape<_256, _128, _128>;
+  using PerSmTileShape_MNK = Shape<_256, _128, _128>;
+};
+
+template <typename Config, typename OutType>
+struct Fp4GemmSm120 {
+  using ElementA = cutlass::nv_float4_t<cutlass::float_e2m1_t>;
+  using LayoutATag = cutlass::layout::RowMajor;
+  static constexpr int AlignmentA = 32;
+
+  using ElementB = cutlass::nv_float4_t<cutlass::float_e2m1_t>;
+  using LayoutBTag = cutlass::layout::ColumnMajor;
+  static constexpr int AlignmentB = 32;
+
+  using ElementD = OutType;
+  using ElementC = OutType;
+  using LayoutCTag = cutlass::layout::RowMajor;
+  using LayoutDTag = cutlass::layout::RowMajor;
+  static constexpr int AlignmentD = 128 / cutlass::sizeof_bits<ElementD>::value;
+  static constexpr int AlignmentC = 128 / cutlass::sizeof_bits<ElementC>::value;
+
+  using ElementAccumulator = float;
+  using ArchTag = cutlass::arch::Sm120;
+  using OperatorClass = cutlass::arch::OpClassBlockScaledTensorOp;
+
+  using MmaTileShape = typename Config::MmaTileShape;
+  using ClusterShape = typename Config::ClusterShape;
+  using PerSmTileShape_MNK = typename Config::PerSmTileShape_MNK;
+
+  using CollectiveEpilogue =
+      typename cutlass::epilogue::collective::CollectiveBuilder<
+          ArchTag, OperatorClass, PerSmTileShape_MNK, ClusterShape,
+          cutlass::epilogue::collective::EpilogueTileAuto, ElementAccumulator,
+          ElementAccumulator, ElementC, LayoutCTag, AlignmentC, ElementD,
+          LayoutDTag, AlignmentD,
+          cutlass::epilogue::collective::EpilogueScheduleAuto>::CollectiveOp;
+
+  using CollectiveMainloop =
+      typename cutlass::gemm::collective::CollectiveBuilder<
+          ArchTag, OperatorClass, ElementA, LayoutATag, AlignmentA, ElementB,
+          LayoutBTag, AlignmentB, ElementAccumulator, MmaTileShape,
+          ClusterShape,
+          cutlass::gemm::collective::StageCountAutoCarveout<static_cast<int>(
+              sizeof(typename CollectiveEpilogue::SharedStorage))>,
+          cutlass::gemm::collective::KernelScheduleAuto>::CollectiveOp;
+
+  using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
+      Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>;
+
+  using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
+};
+
+template <typename Gemm>
+typename Gemm::Arguments args_from_options(at::Tensor& D, at::Tensor const& A,
+                                           at::Tensor const& B,
+                                           at::Tensor const& A_sf,
+                                           at::Tensor const& B_sf,
+                                           torch::Tensor const& alpha, int M,
+                                           int N, int K) {
+  using ElementA = typename Gemm::ElementA;
+  using ElementB = typename Gemm::ElementB;
+  using ElementD = typename Gemm::ElementD;
+  using ElementSFA = cutlass::float_ue4m3_t;
+  using ElementSFB = cutlass::float_ue4m3_t;
+  using ElementCompute = float;
+
+  using StrideA = typename Gemm::GemmKernel::StrideA;
+  using StrideB = typename Gemm::GemmKernel::StrideB;
+  using StrideC = typename Gemm::GemmKernel::StrideC;
+  using StrideD = typename Gemm::GemmKernel::StrideD;
+
+  using Sm1xxBlkScaledConfig =
+      typename Gemm::GemmKernel::CollectiveMainloop::Sm1xxBlkScaledConfig;
+
+  auto stride_A = cutlass::make_cute_packed_stride(StrideA{}, {M, K, 1});
+  auto stride_B = cutlass::make_cute_packed_stride(StrideB{}, {N, K, 1});
+  auto stride_D = cutlass::make_cute_packed_stride(StrideD{}, {M, N, 1});
+
+  auto layout_SFA = Sm1xxBlkScaledConfig::tile_atom_to_shape_SFA(
+      cute::make_shape(M, N, K, 1));
+  auto layout_SFB = Sm1xxBlkScaledConfig::tile_atom_to_shape_SFB(
+      cute::make_shape(M, N, K, 1));
+
+  typename Gemm::Arguments arguments{
+      cutlass::gemm::GemmUniversalMode::kGemm,
+      {M, N, K, 1},
+      {static_cast<ElementA const*>(A.data_ptr()), stride_A,
+       static_cast<ElementB const*>(B.data_ptr()), stride_B,
+       static_cast<ElementSFA const*>(A_sf.data_ptr()), layout_SFA,
+       static_cast<ElementSFB const*>(B_sf.data_ptr()), layout_SFB},
+      {{},
+       static_cast<ElementD const*>(D.data_ptr()),
+       stride_D,
+       static_cast<ElementD*>(D.data_ptr()),
+       stride_D}};
+  auto& fusion_args = arguments.epilogue.thread;
+  fusion_args.alpha_ptr = static_cast<ElementCompute const*>(alpha.data_ptr());
+
+  return arguments;
+}
+
+template <typename Gemm>
+void runGemm(at::Tensor& D, at::Tensor const& A, at::Tensor const& B,
+             at::Tensor const& A_sf, at::Tensor const& B_sf,
+             torch::Tensor const& alpha, int M, int N, int K,
+             cudaStream_t stream) {
+  Gemm gemm;
+
+  auto arguments = args_from_options<Gemm>(D, A, B, A_sf, B_sf, alpha, M, N, K);
+
+  size_t workspace_size = Gemm::get_workspace_size(arguments);
+  auto const workspace_options =
+      torch::TensorOptions().dtype(torch::kUInt8).device(A.device());
+  auto workspace = torch::empty(workspace_size, workspace_options);
+
+  CUTLASS_CHECK(gemm.can_implement(arguments));
+
+  CUTLASS_CHECK(gemm.initialize(arguments, workspace.data_ptr(), stream));
+
+  CUTLASS_CHECK(gemm.run(arguments, workspace.data_ptr(), stream));
+}
+
+void cutlass_fp4_bf16_gemm_dispatch(torch::Tensor& D, torch::Tensor const& A,
+                                    torch::Tensor const& B,
+                                    torch::Tensor const& A_sf,
+                                    torch::Tensor const& B_sf,
+                                    torch::Tensor const& alpha, int m, int n,
+                                    int k, cudaStream_t stream) {
+  uint32_t const mp2 = std::max(static_cast<uint32_t>(16), next_pow_2(m));
+  if (mp2 <= 256) {
+    runGemm<Fp4GemmSm120<sm120_fp4_config_M256, cutlass::bfloat16_t>::Gemm>(
+        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+  } else {
+    runGemm<Fp4GemmSm120<sm120_fp4_config_default, cutlass::bfloat16_t>::Gemm>(
+        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+  }
+}
+
+void cutlass_fp4_f16_gemm_dispatch(torch::Tensor& D, torch::Tensor const& A,
+                                   torch::Tensor const& B,
+                                   torch::Tensor const& A_sf,
+                                   torch::Tensor const& B_sf,
+                                   torch::Tensor const& alpha, int m, int n,
+                                   int k, cudaStream_t stream) {
+  uint32_t const mp2 = std::max(static_cast<uint32_t>(16), next_pow_2(m));
+  if (mp2 <= 256) {
+    runGemm<Fp4GemmSm120<sm120_fp4_config_M256, cutlass::half_t>::Gemm>(
+        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+  } else {
+    runGemm<Fp4GemmSm120<sm120_fp4_config_default, cutlass::half_t>::Gemm>(
+        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+  }
+}
+
+void cutlass_scaled_fp4_mm_sm120a(torch::Tensor& D, torch::Tensor const& A,
+                                  torch::Tensor const& B,
+                                  torch::Tensor const& A_sf,
+                                  torch::Tensor const& B_sf,
+                                  torch::Tensor const& alpha) {
+#if defined(CUTLASS_ARCH_MMA_SM120_SUPPORTED)
+  CHECK_INPUT(A, FLOAT4_E2M1X2, "a");
+  CHECK_INPUT(B, FLOAT4_E2M1X2, "b");
+
+  CHECK_INPUT(A_sf, SF_DTYPE, "scale_a");
+  CHECK_INPUT(B_sf, SF_DTYPE, "scale_b");
+
+  CHECK_INPUT(alpha, at::ScalarType::Float, "alpha");
+
+  TORCH_CHECK(A.dim() == 2, "a must be a matrix");
+  TORCH_CHECK(B.dim() == 2, "b must be a matrix");
+  TORCH_CHECK(A.sizes()[1] == B.sizes()[1],
+              "a and b shapes cannot be multiplied (", A.sizes()[0], "x",
+              A.sizes()[1], " and ", B.sizes()[0], "x", B.sizes()[1], ")");
+
+  auto const m = A.sizes()[0];
+  auto const n = B.sizes()[0];
+  auto const k = A.sizes()[1] * 2;
+
+  constexpr int alignment = 32;
+  TORCH_CHECK(k % alignment == 0, "Expected k to be divisible by ", alignment,
+              ", but got a shape: (", A.sizes()[0], "x", A.sizes()[1],
+              "), k: ", k, ".");
+  TORCH_CHECK(n % alignment == 0, "Expected n to be divisible by ", alignment,
+              ", but got b shape: (", B.sizes()[0], "x", B.sizes()[1], ").");
+
+  auto round_up = [](int x, int y) { return (x + y - 1) / y * y; };
+  int rounded_m = round_up(m, 128);
+  int rounded_n = round_up(n, 128);
+  // Since k is divisible by 32 (alignment), k / 16 is guaranteed to be an
+  // integer.
+  int rounded_k = round_up(k / 16, 4);
+
+  TORCH_CHECK(A_sf.dim() == 2, "scale_a must be a matrix");
+  TORCH_CHECK(B_sf.dim() == 2, "scale_b must be a matrix");
+  TORCH_CHECK(A_sf.sizes()[1] == B_sf.sizes()[1],
+              "scale_a and scale_b shapes cannot be multiplied (",
+              A_sf.sizes()[0], "x", A_sf.sizes()[1], " and ", B_sf.sizes()[0],
+              "x", B_sf.sizes()[1], ")");
+  TORCH_CHECK(A_sf.sizes()[0] == rounded_m && A_sf.sizes()[1] == rounded_k,
+              "scale_a must be padded and swizzled to a shape (", rounded_m,
+              "x", rounded_k, "), but got a shape (", A_sf.sizes()[0], "x",
+              A_sf.sizes()[1], ")");
+  TORCH_CHECK(B_sf.sizes()[0] == rounded_n && B_sf.sizes()[1] == rounded_k,
+              "scale_b must be padded and swizzled to a shape (", rounded_n,
+              "x", rounded_k, "), but got a shape (", B_sf.sizes()[0], "x",
+              B_sf.sizes()[1], ")");
+
+  auto out_dtype = D.dtype();
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(A));
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream(A.get_device());
+
+  if (out_dtype == at::ScalarType::BFloat16) {
+    return cutlass_fp4_bf16_gemm_dispatch(D, A, B, A_sf, B_sf, alpha, m, n, k,
+                                          stream);
+  } else if (out_dtype == at::ScalarType::Half) {
+    return cutlass_fp4_f16_gemm_dispatch(D, A, B, A_sf, B_sf, alpha, m, n, k,
+                                         stream);
+  } else {
+    TORCH_CHECK(false, "Unsupported output data type of nvfp4 mm sm120 (",
+                out_dtype, ")");
+  }
+#else
+  TORCH_CHECK(false,
+              "Unsupported CUTLASS version. Set VLLM_CUTLASS_SRC_DIR to "
+              "a CUTLASS 3.8 source directory to enable support.");
+#endif  // defined(CUTLASS_ARCH_MMA_SM120_SUPPORTED)
+}
\ No newline at end of file

From 7de45db9a5b95073c3f99eec75ae510d347d625f Mon Sep 17 00:00:00 2001
From: Ning Xie <andy.xning@gmail.com>
Date: Sun, 3 Aug 2025 15:55:20 +0800
Subject: [PATCH 172/224] [Misc] update doc comment for send (#22026)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
---
 .../device_communicators/base_device_communicator.py            | 2 +-
 vllm/distributed/device_communicators/cuda_communicator.py      | 2 +-
 vllm/distributed/parallel_state.py                              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/distributed/device_communicators/base_device_communicator.py b/vllm/distributed/device_communicators/base_device_communicator.py
index dc5923cdc5a0d..127a340fc6c6d 100644
--- a/vllm/distributed/device_communicators/base_device_communicator.py
+++ b/vllm/distributed/device_communicators/base_device_communicator.py
@@ -219,7 +219,7 @@ class DeviceCommunicatorBase:
         return output_tensor
 
     def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None:
-        """Sends a tensor to the destination rank in a non-blocking way"""
+        """Sends a tensor to the destination rank in a blocking way"""
         """NOTE: `dst` is the local rank of the destination rank."""
         if dst is None:
             dst = (self.rank_in_group + 1) % self.world_size
diff --git a/vllm/distributed/device_communicators/cuda_communicator.py b/vllm/distributed/device_communicators/cuda_communicator.py
index e4804691f0f65..4ab8f3d938fcf 100644
--- a/vllm/distributed/device_communicators/cuda_communicator.py
+++ b/vllm/distributed/device_communicators/cuda_communicator.py
@@ -179,7 +179,7 @@ class CudaCommunicator(DeviceCommunicatorBase):
         return output.movedim(0, dim).contiguous()
 
     def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None:
-        """Sends a tensor to the destination rank in a non-blocking way"""
+        """Sends a tensor to the destination rank in a blocking way"""
         """NOTE: `dst` is the local rank of the destination rank."""
         if dst is None:
             dst = (self.rank_in_group + 1) % self.world_size
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index 1f7a14920c418..ee581124db510 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -782,7 +782,7 @@ class GroupCoordinator:
         torch.distributed.barrier(group=self.cpu_group)
 
     def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None:
-        """Sends a tensor to the destination rank in a non-blocking way"""
+        """Sends a tensor to the destination rank in a blocking way"""
         """NOTE: `dst` is the local rank of the destination rank."""
         self.device_communicator.send(tensor, dst)
 

From 24d1dffbeb0d27cf42904153f56e919fb01b5a07 Mon Sep 17 00:00:00 2001
From: H <linhaibin.eric@gmail.com>
Date: Sun, 3 Aug 2025 03:04:45 -0700
Subject: [PATCH 173/224] [executor] feat: add supports_pp attr to executors
 (#21786)

Signed-off-by: Haibin Lin <haibin.lin@bytedance.com>
---
 vllm/engine/arg_utils.py                     | 20 ++++++++++++--------
 vllm/executor/executor_base.py               |  1 +
 vllm/v1/executor/multiproc_executor.py       |  2 ++
 vllm/v1/executor/ray_distributed_executor.py |  2 ++
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 47b3efa6af726..c94e440e5c845 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1490,14 +1490,18 @@ class EngineArgs:
                 and _warn_or_fallback("Engine in background thread")):
             return False
 
-        if (self.pipeline_parallel_size > 1
-                and self.distributed_executor_backend
-                not in (ParallelConfig.distributed_executor_backend, "ray",
-                        "mp", "external_launcher")):
-            name = "Pipeline Parallelism without Ray distributed executor " \
-                    "or multiprocessing executor or external launcher"
-            _raise_or_fallback(feature_name=name, recommend_to_remove=False)
-            return False
+        if self.pipeline_parallel_size > 1:
+            supports_pp = getattr(self.distributed_executor_backend,
+                                  'supports_pp', False)
+            if not supports_pp and self.distributed_executor_backend not in (
+                    ParallelConfig.distributed_executor_backend, "ray", "mp",
+                    "external_launcher"):
+                name = "Pipeline Parallelism without Ray distributed " \
+                        "executor or multiprocessing executor or external " \
+                        "launcher"
+                _raise_or_fallback(feature_name=name,
+                                   recommend_to_remove=False)
+                return False
 
         # The platform may be supported on V1, but off by default for now.
         if not current_platform.default_v1(  # noqa: SIM103
diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py
index 97d0d6f08b81e..813232cd19281 100644
--- a/vllm/executor/executor_base.py
+++ b/vllm/executor/executor_base.py
@@ -35,6 +35,7 @@ class ExecutorBase(ABC):
     """
 
     uses_ray: bool  # whether the executor uses Ray for orchestration.
+    supports_pp: bool = False  # whether the executor supports PP
 
     def __init__(
         self,
diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py
index d90051c3224fd..0db3bcd7fb408 100644
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -41,6 +41,8 @@ logger = init_logger(__name__)
 
 class MultiprocExecutor(Executor):
 
+    supports_pp: bool = True
+
     def _init_executor(self) -> None:
         # Call self.shutdown at exit to clean up
         # and ensure workers will be terminated.
diff --git a/vllm/v1/executor/ray_distributed_executor.py b/vllm/v1/executor/ray_distributed_executor.py
index b86ac048f5206..c05ad1966d611 100644
--- a/vllm/v1/executor/ray_distributed_executor.py
+++ b/vllm/v1/executor/ray_distributed_executor.py
@@ -43,6 +43,8 @@ class FutureWrapper(Future):
 class RayDistributedExecutor(RayDistributedExecutorV0, Executor):
     """Ray distributed executor using Ray Compiled Graphs."""
 
+    supports_pp: bool = True
+
     def _init_executor(self) -> None:
         super()._init_executor()
 

From aefeea0fde0fbe5871a0799fad583e6ed6fdf903 Mon Sep 17 00:00:00 2001
From: David Ben-David <sdavidbd@gmail.com>
Date: Sun, 3 Aug 2025 14:03:40 +0300
Subject: [PATCH 174/224] [V1] [P/D] Refactor KV Connector Path (#21980)

Signed-off-by: David Ben-David <davidb@pliops.com>
Co-authored-by: David Ben-David <davidb@pliops.com>
---
 .../unit/test_output_aggreagator.py           | 20 +++++-
 .../unit/test_remote_decode_lifecycle.py      |  8 ++-
 .../unit/test_remote_prefill_lifecycle.py     |  8 ++-
 tests/v1/kv_connector/unit/utils.py           |  8 ++-
 .../kv_transfer/kv_connector/utils.py         | 16 +++--
 vllm/sequence.py                              | 13 ++--
 vllm/v1/core/sched/scheduler.py               | 12 ++--
 vllm/v1/outputs.py                            | 13 ++--
 vllm/v1/worker/gpu_model_runner.py            | 30 +++------
 vllm/v1/worker/gpu_worker.py                  | 22 ++++---
 .../worker/kv_connector_model_runner_mixin.py | 63 ++++++++++++++++---
 vllm/v1/worker/tpu_model_runner.py            |  9 +--
 12 files changed, 142 insertions(+), 80 deletions(-)

diff --git a/tests/v1/kv_connector/unit/test_output_aggreagator.py b/tests/v1/kv_connector/unit/test_output_aggreagator.py
index cad73f68e9f15..5d2b27a9eb4da 100644
--- a/tests/v1/kv_connector/unit/test_output_aggreagator.py
+++ b/tests/v1/kv_connector/unit/test_output_aggreagator.py
@@ -4,7 +4,7 @@ from concurrent.futures import Future
 from typing import Optional
 
 from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
-from vllm.v1.outputs import ModelRunnerOutput
+from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
 
 
 class DummyModelRunnerOutput(ModelRunnerOutput):
@@ -12,8 +12,16 @@ class DummyModelRunnerOutput(ModelRunnerOutput):
     def __init__(self,
                  finished_sending: Optional[set[str]] = None,
                  finished_recving: Optional[set[str]] = None):
-        self.finished_sending = finished_sending
-        self.finished_recving = finished_recving
+        self.kv_connector_output = KVConnectorOutput(
+            finished_sending=finished_sending,
+            finished_recving=finished_recving,
+        )
+
+    def __repr__(self):
+        return (
+            f"DummyModelRunnerOutput("
+            f"finished_sending={self.kv_connector_output.finished_sending},"
+            f"finished_recving={self.kv_connector_output.finished_recving})")
 
 
 def test_aggregate_workers_output():
@@ -27,6 +35,7 @@ def test_aggregate_workers_output():
     aggregated = aggregator.aggregate([output1, output2])
 
     assert aggregated is output1
+    aggregated = aggregated.kv_connector_output
     assert aggregated.finished_sending is None
     assert aggregated.finished_recving is None
 
@@ -38,6 +47,7 @@ def test_aggregate_workers_output():
     aggregated = aggregator.aggregate([output1, output2])
 
     assert aggregated is output1
+    aggregated = aggregated.kv_connector_output
     assert aggregated.finished_sending == {'req1'}
     assert aggregated.finished_recving is None
 
@@ -49,6 +59,7 @@ def test_aggregate_workers_output():
     aggregated = aggregator.aggregate([output1, output2])
 
     assert aggregated is output1
+    aggregated = aggregated.kv_connector_output
     assert aggregated.finished_sending is None
     assert aggregated.finished_recving == {'req2'}
 
@@ -70,6 +81,7 @@ def test_async_aggregate_workers_output():
     assert result_future.done()
     aggregated = result_future.result()
     assert aggregated is output1
+    aggregated = aggregated.kv_connector_output
     assert aggregated.finished_sending is None
     assert aggregated.finished_recving is None
 
@@ -87,6 +99,7 @@ def test_async_aggregate_workers_output():
     assert result_future.done()
     aggregated = result_future.result()
     assert aggregated is output1
+    aggregated = aggregated.kv_connector_output
     assert aggregated.finished_sending == {'req1'}
     assert aggregated.finished_recving is None
 
@@ -104,5 +117,6 @@ def test_async_aggregate_workers_output():
     assert result_future.done()
     aggregated = result_future.result()
     assert aggregated is output1
+    aggregated = aggregated.kv_connector_output
     assert aggregated.finished_sending is None
     assert aggregated.finished_recving == {'req2'}
diff --git a/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py b/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
index 12a71d97e8d29..76394a540aacd 100644
--- a/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import copy
 
-from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT
+from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
 from vllm.v1.request import FinishReason, RequestStatus
 
 from .utils import (assert_scheduler_empty, create_model_runner_output,
@@ -86,7 +86,8 @@ def test_basic_lifecycle():
 
     # (3b): execute_model()
     model_runner_output = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
-    model_runner_output.finished_sending = [request_id]
+    model_runner_output.kv_connector_output = KVConnectorOutput(
+        finished_sending=[request_id])
 
     # (3c): update_from_output()
     scheduler.update_from_output(scheduler_output, model_runner_output)
@@ -176,7 +177,8 @@ def test_prefix_cache_lifecycle():
     scheduler_output = scheduler.schedule()
     scheduler.schedule()
     model_runner_output = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
-    model_runner_output.finished_sending = [request_remote.request_id]
+    model_runner_output.kv_connector_output = KVConnectorOutput(
+        finished_sending=[request_remote.request_id])
     scheduler.update_from_output(scheduler_output, model_runner_output)
     _ = scheduler.schedule()
     assert_scheduler_empty(scheduler)
diff --git a/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py b/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
index f89970bf2c807..3d52ea526d96b 100644
--- a/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import copy
 
-from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT
+from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
 from vllm.v1.request import FinishReason, RequestStatus
 
 from .utils import (assert_scheduler_empty, create_model_runner_output,
@@ -72,7 +72,8 @@ def test_basic_lifecycle():
 
     # (2b): forward(): request finishes recv.
     model_runner_output = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
-    model_runner_output.finished_recving = [request_id]
+    model_runner_output.kv_connector_output = KVConnectorOutput(
+        finished_recving=[request_id])
 
     # (2c): update_from_output():
     engine_core_outputs = scheduler.update_from_output(scheduler_output,
@@ -309,7 +310,8 @@ def test_full_block_prompt():
     # # STEP (2): Recv.
     scheduler_output = scheduler.schedule()
     model_runner_output = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
-    model_runner_output.finished_recving = [request_id]
+    model_runner_output.kv_connector_output = KVConnectorOutput(
+        finished_recving=[request_id])
     scheduler.update_from_output(scheduler_output, model_runner_output)
     assert len(scheduler.waiting) == 1
     assert (request_id in scheduler.finished_recving_kv_req_ids)
diff --git a/tests/v1/kv_connector/unit/utils.py b/tests/v1/kv_connector/unit/utils.py
index 480a7074cdf4e..291c84d117cb6 100644
--- a/tests/v1/kv_connector/unit/utils.py
+++ b/tests/v1/kv_connector/unit/utils.py
@@ -17,7 +17,7 @@ from vllm.v1.core.kv_cache_manager import KVCacheBlocks
 from vllm.v1.core.sched.scheduler import Scheduler
 from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
                                         KVCacheGroupSpec)
-from vllm.v1.outputs import ModelRunnerOutput
+from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
 from vllm.v1.request import Request
 from vllm.v1.structured_output import StructuredOutputManager
 
@@ -188,8 +188,10 @@ def create_model_runner_output(
         logprobs=None,
         prompt_logprobs_dict={},
         pooler_output=None,
-        finished_sending=finished_sending,
-        finished_recving=finished_recving,
+        kv_connector_output=KVConnectorOutput(
+            finished_sending=finished_sending,
+            finished_recving=finished_recving,
+        ),
     )
 
 
diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py
index 559c233947ce8..1a11cb6d0189a 100644
--- a/vllm/distributed/kv_transfer/kv_connector/utils.py
+++ b/vllm/distributed/kv_transfer/kv_connector/utils.py
@@ -16,7 +16,7 @@ from vllm.config import VllmConfig, get_current_vllm_config
 from vllm.distributed.kv_transfer.kv_connector.factory import (
     KVConnectorFactory)
 from vllm.logger import init_logger
-from vllm.v1.outputs import ModelRunnerOutput
+from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
 
 logger = init_logger(__name__)
 
@@ -129,7 +129,7 @@ class KVOutputAggregator:
     def aggregate(self,
                   outputs: list[ModelRunnerOutput],
                   output_rank: int = 0) -> ModelRunnerOutput:
-        # aggregate finished_sending, finished_recving from all workers
+        # aggregate kv_connector_output from all workers
 
         def update_finished_set(req_ids: Optional[set[str]],
                                 remaining_count_dict: dict[str, int],
@@ -143,6 +143,7 @@ class KVOutputAggregator:
         finished_sending = set[str]()
         finished_recving = set[str]()
         for output in outputs:
+            output = output.kv_connector_output
             update_finished_set(output.finished_sending,
                                 self._send_remaining_count, finished_sending)
             update_finished_set(output.finished_recving,
@@ -151,13 +152,10 @@ class KVOutputAggregator:
         # select output of the worker specified by output_rank
         output = outputs[output_rank]
 
-        # set the aggregated finished_sending / finished_recving
-        # if output.finished_sending/recving is not empty, but the other ranks
-        # still have unfinished send/recv, we want to set the aggregated
-        # finished_sending/recving to None until all ranks have finished
-        # send/recv
-        output.finished_sending = finished_sending if finished_sending else None
-        output.finished_recving = finished_recving if finished_recving else None
+        output.kv_connector_output = KVConnectorOutput(
+            finished_sending=finished_sending or None,
+            finished_recving=finished_recving or None,
+        )
 
         return output
 
diff --git a/vllm/sequence.py b/vllm/sequence.py
index fe87b52f9df15..6e65a2bd03189 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -10,7 +10,7 @@ from collections.abc import Mapping
 from collections.abc import Sequence as GenericSequence
 from dataclasses import dataclass, field
 from functools import reduce
-from typing import Any, Callable, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, Union
 
 import msgspec
 import torch
@@ -21,6 +21,10 @@ from vllm.multimodal import MultiModalKwargs, MultiModalPlaceholderDict
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import RequestOutputKind, SamplingParams
 
+if TYPE_CHECKING:
+    from vllm.v1.worker.kv_connector_model_runner_mixin import (
+        KVConnectorOutput)
+
 VLLM_TOKEN_ID_ARRAY_TYPE = "l"
 
 VLLM_INVALID_TOKEN_ID = -1
@@ -1159,14 +1163,11 @@ class IntermediateTensors:
     states and residuals to be sent to the next stage. This data structure
     contains the hidden states and residuals for a request.
     
-    Each stage also needs to handle its own finished_sending and 
-    finished_recving in case of kv transfer.
+    Each stage also needs to handle its own kv_connector_output.
     """
 
     tensors: dict[str, torch.Tensor]
-    # [req_ids]
-    finished_sending: Optional[set[str]] = None
-    finished_recving: Optional[set[str]] = None
+    kv_connector_output: Optional["KVConnectorOutput"]
 
     def __init__(self, tensors):
         # manually define this function, so that
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
index 446f98034cb8b..49a744cfec69a 100644
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -30,7 +30,7 @@ from vllm.v1.engine import (EngineCoreEventType, EngineCoreOutput,
                             EngineCoreOutputs)
 from vllm.v1.kv_cache_interface import KVCacheConfig
 from vllm.v1.metrics.stats import SchedulerStats
-from vllm.v1.outputs import ModelRunnerOutput
+from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
 from vllm.v1.request import Request, RequestStatus
 from vllm.v1.spec_decode.metrics import SpecDecodingStats
 from vllm.v1.structured_output import StructuredOutputManager
@@ -884,7 +884,9 @@ class Scheduler(SchedulerInterface):
             self.waiting.remove_requests(stopped_preempted_reqs)
 
         # KV Connector: update state for finished KV Transfers.
-        self._update_from_kv_xfer_finished(model_runner_output)
+        if model_runner_output.kv_connector_output:
+            self._update_from_kv_xfer_finished(
+                model_runner_output.kv_connector_output)
 
         # Create EngineCoreOutputs for all clients that have requests with
         # outputs in this step.
@@ -1128,7 +1130,7 @@ class Scheduler(SchedulerInterface):
         return True
 
     def _update_from_kv_xfer_finished(self,
-                                      model_runner_output: ModelRunnerOutput):
+                                      kv_connector_output: KVConnectorOutput):
         """
         KV Connector: update the scheduler state based on the output.
 
@@ -1139,9 +1141,9 @@ class Scheduler(SchedulerInterface):
             scheduler the request during the next step.
         """
         # KV Connector:: update recv and send status from last step.
-        for req_id in (model_runner_output.finished_recving or ()):
+        for req_id in (kv_connector_output.finished_recving or ()):
             logger.debug("Finished recving KV transfer for request %s", req_id)
             self.finished_recving_kv_req_ids.add(req_id)
-        for req_id in (model_runner_output.finished_sending or ()):
+        for req_id in (kv_connector_output.finished_sending or ()):
             logger.debug("Finished sending KV transfer for request %s", req_id)
             self._free_blocks(self.requests[req_id])
diff --git a/vllm/v1/outputs.py b/vllm/v1/outputs.py
index f78623f571b2d..7d7cd0c94dd04 100644
--- a/vllm/v1/outputs.py
+++ b/vllm/v1/outputs.py
@@ -71,6 +71,13 @@ class SamplerOutput:
     logprobs_tensors: Optional[LogprobsTensors]
 
 
+@dataclass
+class KVConnectorOutput:
+    # [req_ids]
+    finished_sending: Optional[set[str]] = None
+    finished_recving: Optional[set[str]] = None
+
+
 # ModelRunnerOutput is serialized and sent to the scheduler process.
 # This is expensive for torch.Tensor so prefer to use list instead.
 @dataclass
@@ -104,9 +111,7 @@ class ModelRunnerOutput:
     # [num_reqs, hidden_size]
     pooler_output: list[Optional[torch.Tensor]]
 
-    # [req_ids]
-    finished_sending: Optional[set[str]] = None
-    finished_recving: Optional[set[str]] = None
+    kv_connector_output: Optional[KVConnectorOutput] = None
 
     # req_id -> num_nans_in_logits
     num_nans_in_logits: Optional[dict[str, int]] = None
@@ -119,6 +124,4 @@ EMPTY_MODEL_RUNNER_OUTPUT = ModelRunnerOutput(req_ids=[],
                                               logprobs=None,
                                               prompt_logprobs_dict={},
                                               pooler_output=[],
-                                              finished_sending=None,
-                                              finished_recving=None,
                                               num_nans_in_logits=None)
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 42cef6c5733d2..041687ae28b20 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -69,7 +69,7 @@ from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
 from vllm.v1.spec_decode.ngram_proposer import NgramProposer
 from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
 from vllm.v1.worker.kv_connector_model_runner_mixin import (
-    KVConnectorModelRunnerMixin)
+    KVConnectorModelRunnerMixin, KVConnectorOutput)
 from vllm.v1.worker.lora_model_runner_mixin import LoRAModelRunnerMixin
 
 from ..sample.logits_processor import LogitsProcessorManager
@@ -1423,8 +1423,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         hidden_states: torch.Tensor,
         num_scheduled_tokens: int,
         num_scheduled_tokens_np: np.ndarray,
-        finished_sending: Optional[set[str]],
-        finished_recving: Optional[set[str]],
+        kv_connector_output: Optional[KVConnectorOutput],
     ) -> ModelRunnerOutput:
         assert self.input_batch.num_reqs ==\
             len(self.input_batch.pooling_params), \
@@ -1459,8 +1458,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             logprobs=None,
             prompt_logprobs_dict={},
             pooler_output=pooler_output,
-            finished_sending=finished_sending,
-            finished_recving=finished_recving,
+            kv_connector_output=kv_connector_output,
         )
 
     @torch.inference_mode()
@@ -1564,8 +1562,8 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 num_tokens=num_input_tokens,
                 num_tokens_across_dp=num_tokens_across_dp,
                 skip_cuda_graphs=skip_cuda_graphs,
-        ):
-            self.maybe_setup_kv_connector(scheduler_output)
+        ), self.maybe_get_kv_connector_output(
+                scheduler_output) as kv_connector_output:
 
             model_output = self.model(
                 input_ids=input_ids,
@@ -1578,10 +1576,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 ),
             )
 
-            self.maybe_wait_for_kv_save()
-            finished_sending, finished_recving = (
-                self.get_finished_kv_transfers(scheduler_output))
-
         if self.use_aux_hidden_state_outputs:
             hidden_states, aux_hidden_states = model_output
         else:
@@ -1597,20 +1591,17 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             == "external_launcher" and len(get_pp_group().ranks) > 0
         if not get_pp_group().is_last_rank:
             # For mid-pipeline stages, return the hidden states.
-            if not broadcast_pp_output:
-                if finished_sending or finished_recving:
-                    hidden_states.finished_sending = finished_sending
-                    hidden_states.finished_recving = finished_recving
-                return hidden_states
             assert isinstance(hidden_states, IntermediateTensors)
+            if not broadcast_pp_output:
+                hidden_states.kv_connector_output = kv_connector_output
+                return hidden_states
             get_pp_group().send_tensor_dict(hidden_states.tensors,
                                             all_gather_group=get_tp_group())
             logits = None
         else:
             if self.input_batch.pooling_params:
                 return self._pool(hidden_states, num_scheduled_tokens,
-                                  num_scheduled_tokens_np, finished_sending,
-                                  finished_recving)
+                                  num_scheduled_tokens_np, kv_connector_output)
 
             sample_hidden_states = hidden_states[logits_indices]
             logits = self.model.compute_logits(sample_hidden_states, None)
@@ -1760,8 +1751,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             logprobs=logprobs_lists,
             prompt_logprobs_dict=prompt_logprobs_dict,
             pooler_output=[],
-            finished_sending=finished_sending,
-            finished_recving=finished_recving,
+            kv_connector_output=kv_connector_output,
             num_nans_in_logits=num_nans_in_logits,
         )
 
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
index 4bc4ece9a0df4..7fca245c1bef8 100644
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -16,8 +16,7 @@ from vllm.config import VllmConfig
 from vllm.distributed import (ensure_model_parallel_initialized,
                               init_distributed_environment,
                               set_custom_all_reduce)
-from vllm.distributed.kv_transfer import (ensure_kv_transfer_initialized,
-                                          has_kv_transfer_group)
+from vllm.distributed.kv_transfer import ensure_kv_transfer_initialized
 from vllm.distributed.parallel_state import get_pp_group, get_tp_group
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
@@ -369,17 +368,20 @@ class Worker(WorkerBase):
             assert isinstance(output, IntermediateTensors)
             get_pp_group().send_tensor_dict(output.tensors,
                                             all_gather_group=get_tp_group())
-            if not has_kv_transfer_group():
+
+            kv_connector_output = output.kv_connector_output
+            if not kv_connector_output:
                 return None
 
             # In case of PP with kv transfer, we need to pass through the
-            # finished_sending and finished_recving buffers.
-            new_output = EMPTY_MODEL_RUNNER_OUTPUT
-            if output.finished_sending or output.finished_recving:
-                new_output = copy.copy(new_output)
-                new_output.finished_sending = output.finished_sending
-                new_output.finished_recving = output.finished_recving
-            output = new_output
+            # kv_connector_output
+            if (not kv_connector_output.finished_sending
+                    and not kv_connector_output.finished_recving):
+                return EMPTY_MODEL_RUNNER_OUTPUT
+
+            output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT)
+            output.kv_connector_output = kv_connector_output
+            return output
 
         assert isinstance(output, ModelRunnerOutput)
         return output
diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py
index 5a3186058fcfe..343befe176797 100644
--- a/vllm/v1/worker/kv_connector_model_runner_mixin.py
+++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py
@@ -4,6 +4,8 @@
 Define KV connector functionality mixin for model runners.
 """
 import copy
+from contextlib import AbstractContextManager, contextmanager, nullcontext
+from typing import Generator  # noqa: UP035
 from typing import TYPE_CHECKING, Optional
 
 from vllm.config import VllmConfig
@@ -12,7 +14,8 @@ from vllm.distributed.kv_transfer import (get_kv_transfer_group,
 from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorBase_V1
 from vllm.forward_context import get_forward_context, set_forward_context
 from vllm.logger import init_logger
-from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, ModelRunnerOutput
+from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput,
+                             ModelRunnerOutput)
 
 if TYPE_CHECKING:
     from vllm.v1.core.sched.output import SchedulerOutput
@@ -53,18 +56,60 @@ class KVConnectorModelRunnerMixin:
                 scheduler_output.finished_req_ids)
         return None, None
 
-    def kv_connector_no_forward(self, scheduler_output: "SchedulerOutput",
+    @staticmethod
+    def kv_connector_no_forward(scheduler_output: "SchedulerOutput",
                                 vllm_config: VllmConfig) -> ModelRunnerOutput:
         # KV send/recv even if no work to do.
-        with set_forward_context(None, vllm_config):
-            self.maybe_setup_kv_connector(scheduler_output)
-            finished_sending, finished_recving = (
-                self.get_finished_kv_transfers(scheduler_output))
+        with set_forward_context(
+                None, vllm_config
+        ), KVConnectorModelRunnerMixin._get_kv_connector_output(
+                scheduler_output, wait_for_save=False) as kv_connector_output:
+            pass
 
-        if not finished_sending and not finished_recving:
+        if (not kv_connector_output.finished_sending
+                and not kv_connector_output.finished_recving):
             return EMPTY_MODEL_RUNNER_OUTPUT
 
         output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT)
-        output.finished_sending = finished_sending
-        output.finished_recving = finished_recving
+        output.kv_connector_output = kv_connector_output
         return output
+
+    @staticmethod
+    def maybe_get_kv_connector_output(
+        scheduler_output: "SchedulerOutput"
+    ) -> AbstractContextManager[Optional[KVConnectorOutput]]:
+        return KVConnectorModelRunnerMixin._get_kv_connector_output(
+            scheduler_output) if has_kv_transfer_group() else nullcontext()
+
+    # This context manager must be used within an active forward context.
+    # It encapsulates the entire KV conector lifecycle within execute_model
+    @staticmethod
+    @contextmanager
+    def _get_kv_connector_output(
+        scheduler_output: "SchedulerOutput",
+        wait_for_save: bool = True
+    ) -> Generator[KVConnectorOutput, None, None]:
+        output = KVConnectorOutput()
+
+        # Update KVConnector with the KVConnector metadata forward().
+        kv_connector = get_kv_transfer_group()
+        assert isinstance(kv_connector, KVConnectorBase_V1)
+        assert scheduler_output.kv_connector_metadata is not None
+        kv_connector.bind_connector_metadata(
+            scheduler_output.kv_connector_metadata)
+
+        # Background KV cache transfers happen here.
+        # These transfers are designed to be async and the requests
+        # involved may be disjoint from the running requests.
+        # Do this here to save a collective_rpc.
+        kv_connector.start_load_kv(get_forward_context())
+        try:
+            yield output
+        finally:
+            if wait_for_save:
+                kv_connector.wait_for_save()
+
+            output.finished_sending, output.finished_recving = (
+                kv_connector.get_finished(scheduler_output.finished_req_ids))
+
+            kv_connector.clear_connector_metadata()
diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py
index 59cbb0150570b..67cb2f9dd810e 100644
--- a/vllm/v1/worker/tpu_model_runner.py
+++ b/vllm/v1/worker/tpu_model_runner.py
@@ -51,7 +51,7 @@ from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, LogprobsLists,
 from vllm.v1.sample.tpu.metadata import TPUSupportedSamplingMetadata
 from vllm.v1.sample.tpu.sampler import Sampler as TPUSampler
 from vllm.v1.worker.kv_connector_model_runner_mixin import (
-    KVConnectorModelRunnerMixin)
+    KVConnectorModelRunnerMixin, KVConnectorOutput)
 from vllm.v1.worker.lora_model_runner_mixin import LoRAModelRunnerMixin
 from vllm.v1.worker.tpu_input_batch import CachedRequestState, InputBatch
 
@@ -1175,9 +1175,10 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             logprobs=logprobs_lists,
             prompt_logprobs_dict=prompt_logprobs_dict,
             pooler_output=[],
-            finished_sending=finished_sending,
-            finished_recving=finished_recving,
-        )
+            kv_connector_output=KVConnectorOutput(
+                finished_sending=finished_sending,
+                finished_recving=finished_recving,
+            ))
 
         # Check there are no new graphs compiled - all the graphs should be
         # captured and compiled during warm up.

From 6d98843b31fb6d12fa682fecf584a5b7a4e98491 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Sun, 3 Aug 2025 04:04:21 -0700
Subject: [PATCH 175/224] [Responses API] Disable response store by default
 (#22137)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 .../entrypoints/openai/responses/conftest.py  | 12 ++++++---
 .../openai/responses/test_image.py            |  7 ++++--
 vllm/entrypoints/openai/serving_responses.py  | 25 ++++++++++++++++---
 vllm/envs.py                                  | 12 +++++++++
 4 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/responses/conftest.py
index 2dcdda04ecb57..2d677a00b646a 100644
--- a/tests/v1/entrypoints/openai/responses/conftest.py
+++ b/tests/v1/entrypoints/openai/responses/conftest.py
@@ -21,12 +21,16 @@ def default_server_args():
 
 
 @pytest.fixture(scope="module")
-def server(default_server_args):
-    with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
+def server_with_store(default_server_args):
+    with RemoteOpenAIServer(
+            MODEL_NAME,
+            default_server_args,
+            env_dict={"VLLM_ENABLE_RESPONSES_API_STORE": "1"},
+    ) as remote_server:
         yield remote_server
 
 
 @pytest_asyncio.fixture
-async def client(server):
-    async with server.get_async_client() as async_client:
+async def client(server_with_store):
+    async with server_with_store.get_async_client() as async_client:
         yield async_client
diff --git a/tests/v1/entrypoints/openai/responses/test_image.py b/tests/v1/entrypoints/openai/responses/test_image.py
index f3bce91e97cdf..c8d09fd39fb13 100644
--- a/tests/v1/entrypoints/openai/responses/test_image.py
+++ b/tests/v1/entrypoints/openai/responses/test_image.py
@@ -37,8 +37,11 @@ def default_image_server_args():
 
 @pytest.fixture(scope="module")
 def image_server(default_image_server_args):
-    with RemoteOpenAIServer(MODEL_NAME,
-                            default_image_server_args) as remote_server:
+    with RemoteOpenAIServer(
+            MODEL_NAME,
+            default_image_server_args,
+            env_dict={"VLLM_ENABLE_RESPONSES_API_STORE": "1"},
+    ) as remote_server:
         yield remote_server
 
 
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 64880a3a5377f..5e9401cbd7473 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -11,6 +11,7 @@ import jinja2
 from fastapi import Request
 from openai.types.responses import ResponseOutputMessage, ResponseOutputText
 
+from vllm import envs
 from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import (ChatCompletionMessageParam,
@@ -89,15 +90,17 @@ class OpenAIServingResponses(OpenAIServing):
             logger.info("Using default chat sampling params from %s: %s",
                         source, self.default_sampling_params)
 
+        # False by default.
+        self.enable_store = envs.VLLM_ENABLE_RESPONSES_API_STORE
         # HACK(woosuk): This is a hack. We should use a better store.
-        # FIXME: This causes a memory leak since we never remove responses
-        # from the store.
+        # FIXME: If enable_store=True, this may cause a memory leak since we
+        # never remove responses from the store.
         self.response_store: dict[str, ResponsesResponse] = {}
         self.response_store_lock = asyncio.Lock()
 
         # HACK(woosuk): This is a hack. We should use a better store.
-        # FIXME: This causes a memory leak since we never remove messages
-        # from the store.
+        # FIXME: If enable_store=True, this may cause a memory leak since we
+        # never remove messages from the store.
         self.msg_store: dict[str, list[ChatCompletionMessageParam]] = {}
 
         self.background_tasks: dict[str, asyncio.Task] = {}
@@ -118,6 +121,10 @@ class OpenAIServingResponses(OpenAIServing):
         if self.engine_client.errored:
             raise self.engine_client.dead_error
 
+        # If store is not enabled, return an error.
+        if request.store and not self.enable_store:
+            return self._make_store_not_supported_error()
+
         # Handle the previous response ID.
         prev_response_id = request.previous_response_id
         if prev_response_id is not None:
@@ -456,3 +463,13 @@ class OpenAIServingResponses(OpenAIServing):
             message=f"Response with id '{response_id}' not found.",
             status_code=HTTPStatus.NOT_FOUND,
         )
+
+    def _make_store_not_supported_error(self) -> ErrorResponse:
+        return self.create_error_response(
+            err_type="invalid_request_error",
+            message=("`store=True` (default) is not supported. Please set "
+                     "`store=False` in Responses API or set "
+                     "`VLLM_ENABLE_RESPONSES_API_STORE=1` in the env var when "
+                     "starting the vLLM server."),
+            status_code=HTTPStatus.BAD_REQUEST,
+        )
diff --git a/vllm/envs.py b/vllm/envs.py
index 2d470c6dccbfd..8d3c7eab471cf 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -151,6 +151,7 @@ if TYPE_CHECKING:
     VLLM_ENABLE_CUDAGRAPH_GC: bool = False
     VLLM_LOOPBACK_IP: str = ""
     VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE: bool = False
+    VLLM_ENABLE_RESPONSES_API_STORE: bool = False
 
 
 def get_default_cache_root():
@@ -1056,6 +1057,17 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE":
     lambda: bool(int(os.getenv(\
             "VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE", "0"))),
+
+    # Enables support for the "store" option in the OpenAI Responses API.
+    # When set to 1, vLLM's OpenAI server will retain the input and output
+    # messages for those requests in memory. By default, this is disabled (0).
+    # NOTE/WARNING:
+    # 1. Messages are kept in memory only (not persisted to disk) and will be
+    #    lost when the vLLM server shuts down.
+    # 2. Enabling this option will cause a memory leak, as stored messages are
+    #    never removed from memory until the server terminates.
+    "VLLM_ENABLE_RESPONSES_API_STORE":
+    lambda: bool(int(os.getenv("VLLM_ENABLE_RESPONSES_API_STORE", "0"))),
 }
 
 # --8<-- [end:env-vars-definition]

From b5dfb94fa013d4488e6678ae2b0cd08576a12326 Mon Sep 17 00:00:00 2001
From: "Li, Jiang" <jiang1.li@intel.com>
Date: Sun, 3 Aug 2025 20:34:04 +0800
Subject: [PATCH 176/224] [CI/Build][Bugfix] Fix Qwen2.5 tests in CPU CI via
 fallback silu_and_mul to torch native implementation (#22145)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
---
 vllm/model_executor/layers/activation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
index 1fd96fe405b9a..7ce44174ead6d 100644
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -65,11 +65,13 @@ class SiluAndMul(CustomOp):
 
     def __init__(self):
         super().__init__()
-        if current_platform.is_cuda_alike() or current_platform.is_cpu():
+        if current_platform.is_cuda_alike():
             self.op = torch.ops._C.silu_and_mul
         elif current_platform.is_xpu():
             from vllm._ipex_ops import ipex_ops
             self.op = ipex_ops.silu_and_mul
+        elif current_platform.is_cpu():
+            self._forward_method = self.forward_native
 
     def forward_native(self, x: torch.Tensor) -> torch.Tensor:
         """PyTorch-native implementation equivalent to forward()."""

From 83f7bbb3180fe7503cfbc4fb49b06200fb64cdf0 Mon Sep 17 00:00:00 2001
From: TankNee <nee@tanknee.cn>
Date: Sun, 3 Aug 2025 22:47:55 +0800
Subject: [PATCH 177/224] Add chat doc in quick start (#21213)

Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
---
 docs/getting_started/quickstart.md | 37 ++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index 3a93497fab137..f833807666460 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -98,6 +98,43 @@ for output in outputs:
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 ```
 
+!!! note
+    The `llm.generate` method does not automatically apply the model's chat template to the input prompt. Therefore, if you are using an Instruct model or Chat model, you should manually apply the corresponding chat template to ensure the expected behavior. Alternatively, you can use the `llm.chat` method and pass a list of messages which have the same format as those passed to OpenAI's `client.chat.completions`:
+
+    ??? code
+    
+        ```python
+        # Using tokenizer to apply chat template
+        from transformers import AutoTokenizer
+    
+        tokenizer = AutoTokenizer.from_pretrained("/path/to/chat_model")
+        messages_list = [
+            [{"role": "user", "content": prompt}]
+            for prompt in prompts
+        ]
+        texts = tokenizer.apply_chat_template(
+            messages_list,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        
+        # Generate outputs
+        outputs = llm.generate(texts, sampling_params)
+        
+        # Print the outputs.
+        for output in outputs:
+            prompt = output.prompt
+            generated_text = output.outputs[0].text
+            print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    
+        # Using chat interface.
+        outputs = llm.chat(messages_list, sampling_params)
+        for idx, output in enumerate(outputs):
+            prompt = prompts[idx]
+            generated_text = output.outputs[0].text
+            print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+        ```
+
 [](){ #quickstart-online }
 
 ## OpenAI-Compatible Server

From d3c18c9cb0b6c42eab4ed7251adbf68dde4da39a Mon Sep 17 00:00:00 2001
From: Yuxuan Zhang <2448370773@qq.com>
Date: Mon, 4 Aug 2025 00:04:54 +0800
Subject: [PATCH 178/224] fuse fp32 for GLM-4.5 e_score_correction_bias
 (#22143)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
---
 vllm/model_executor/models/glm4_moe.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py
index 6a196fef572de..c702684c6caa1 100644
--- a/vllm/model_executor/models/glm4_moe.py
+++ b/vllm/model_executor/models/glm4_moe.py
@@ -125,9 +125,8 @@ class Glm4MoE(nn.Module):
                                      quant_config=None,
                                      prefix=f"{prefix}.gate")
 
-        # noaux_tc is not set in transformers new config now
-        self.gate.e_score_correction_bias = (nn.Parameter(
-            torch.empty(config.n_routed_experts)))
+        self.gate.e_score_correction_bias = nn.Parameter(
+            torch.empty(config.n_routed_experts, dtype=torch.float32))
 
         # Load balancing settings.
         vllm_config = get_current_vllm_config()

From 6a39ba85fe0f2fff9494b5eccea717c93510c230 Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Mon, 4 Aug 2025 03:04:38 +0800
Subject: [PATCH 179/224] [Bugfix] Fix failing multimodal standard test
 (#22153)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
---
 tests/models/multimodal/test_tensor_schema.py |  2 ++
 tests/models/registry.py                      | 12 ++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/tests/models/multimodal/test_tensor_schema.py b/tests/models/multimodal/test_tensor_schema.py
index bdc62b1d2682d..f80e8456f02e3 100644
--- a/tests/models/multimodal/test_tensor_schema.py
+++ b/tests/models/multimodal/test_tensor_schema.py
@@ -105,6 +105,8 @@ def test_model_tensor_schema(model_arch: str, vllm_runner: type[VllmRunner],
 
     model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
     model_info.check_available_online(on_fail="skip")
+    model_info.check_transformers_version(on_fail="skip",
+                                          check_max_version=False)
 
     model_id = model_info.default
 
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 8fc870cf85642..25cfa267d1815 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -80,6 +80,8 @@ class _HfExamplesInfo:
         self,
         *,
         on_fail: Literal["error", "skip"],
+        check_min_version: bool = True,
+        check_max_version: bool = True,
     ) -> None:
         """
         If the installed transformers version does not meet the requirements,
@@ -96,9 +98,11 @@ class _HfExamplesInfo:
         msg = f"`transformers=={current_version}` installed, but `transformers"
         # Only check the base version for the min/max version, otherwise preview
         # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
-        if min_version and Version(cur_base_version) < Version(min_version):
+        if (check_min_version and min_version
+                and Version(cur_base_version) < Version(min_version)):
             msg += f">={min_version}` is required to run this model."
-        elif max_version and Version(cur_base_version) > Version(max_version):
+        elif (check_max_version and max_version
+              and Version(cur_base_version) > Version(max_version)):
             msg += f"<={max_version}` is required to run this model."
         else:
             return
@@ -185,6 +189,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
                                           min_transformers_version="4.53"),
     "GlmForCausalLM": _HfExamplesInfo("THUDM/glm-4-9b-chat-hf"),
     "Glm4ForCausalLM": _HfExamplesInfo("THUDM/GLM-4-9B-0414"),
+    "Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5",
+                                          min_transformers_version="4.54"),   # noqa: E501
     "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2",
                                        {"alias": "gpt2"}),
     "GPTBigCodeForCausalLM": _HfExamplesInfo("bigcode/starcoder",
@@ -378,8 +384,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
                                         trust_remote_code=True,
                                         hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
     "Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking"),  # noqa: E501
-    "Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5",
-                                          min_transformers_version="4.54"),   # noqa: E501
     "Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
                                           is_available_online=False),   # noqa: E501
     "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",

From 6f5478298ddd8e6aa330f171c70811f667b8699b Mon Sep 17 00:00:00 2001
From: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com>
Date: Sun, 3 Aug 2025 19:23:32 -0700
Subject: [PATCH 180/224] Use `aiohttp` connection pool for benchmarking
 (#21981)

Signed-off-by: Seiji Eicher <seiji@anyscale.com>
---
 vllm/benchmarks/lib/endpoint_request_func.py | 469 +++++++++----------
 vllm/benchmarks/lib/ready_checker.py         |   4 +-
 vllm/benchmarks/serve.py                     |  40 +-
 3 files changed, 271 insertions(+), 242 deletions(-)

diff --git a/vllm/benchmarks/lib/endpoint_request_func.py b/vllm/benchmarks/lib/endpoint_request_func.py
index 60ae520db3862..2d64cc115f00f 100644
--- a/vllm/benchmarks/lib/endpoint_request_func.py
+++ b/vllm/benchmarks/lib/endpoint_request_func.py
@@ -50,6 +50,7 @@ class RequestFuncOutput:
 
 async def async_request_openai_completions(
     request_func_input: RequestFuncInput,
+    session: aiohttp.ClientSession,
     pbar: Optional[tqdm] = None,
 ) -> RequestFuncOutput:
     """The async request function for the OpenAI Completions API.
@@ -66,96 +67,94 @@ async def async_request_openai_completions(
         ("completions", "profile")
     ), "OpenAI Completions API URL must end with 'completions' or 'profile'."
 
-    async with aiohttp.ClientSession(trust_env=True,
-                                     timeout=AIOHTTP_TIMEOUT) as session:
-        payload = {
-            "model": request_func_input.model_name \
-                if request_func_input.model_name else request_func_input.model,
-            "prompt": request_func_input.prompt,
-            "temperature": 0.0,
-            "repetition_penalty": 1.0,
-            "max_tokens": request_func_input.output_len,
-            "logprobs": request_func_input.logprobs,
-            "stream": True,
-            "stream_options": {
-                "include_usage": True,
-            },
-        }
-        if request_func_input.ignore_eos:
-            payload["ignore_eos"] = request_func_input.ignore_eos
-        if request_func_input.extra_body:
-            payload.update(request_func_input.extra_body)
-        headers = {
-            "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
-        }
+    payload = {
+        "model": request_func_input.model_name \
+            if request_func_input.model_name else request_func_input.model,
+        "prompt": request_func_input.prompt,
+        "temperature": 0.0,
+        "repetition_penalty": 1.0,
+        "max_tokens": request_func_input.output_len,
+        "logprobs": request_func_input.logprobs,
+        "stream": True,
+        "stream_options": {
+            "include_usage": True,
+        },
+    }
+    if request_func_input.ignore_eos:
+        payload["ignore_eos"] = request_func_input.ignore_eos
+    if request_func_input.extra_body:
+        payload.update(request_func_input.extra_body)
+    headers = {
+        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
+    }
 
-        output = RequestFuncOutput()
-        output.prompt_len = request_func_input.prompt_len
+    output = RequestFuncOutput()
+    output.prompt_len = request_func_input.prompt_len
 
-        generated_text = ""
-        st = time.perf_counter()
-        most_recent_timestamp = st
-        try:
-            async with session.post(url=api_url, json=payload,
-                                    headers=headers) as response:
-                if response.status == 200:
-                    first_chunk_received = False
-                    async for chunk_bytes in response.content:
-                        chunk_bytes = chunk_bytes.strip()
-                        if not chunk_bytes:
-                            continue
-                        chunk_bytes = chunk_bytes.decode("utf-8")
-                        # NOTE: SSE comments (often used as pings) start with
-                        # a colon. These are not JSON data payload and should
-                        # be skipped.
-                        if chunk_bytes.startswith(":"):
-                            continue
+    generated_text = ""
+    st = time.perf_counter()
+    most_recent_timestamp = st
+    try:
+        async with session.post(url=api_url, json=payload,
+                                headers=headers) as response:
+            if response.status == 200:
+                first_chunk_received = False
+                async for chunk_bytes in response.content:
+                    chunk_bytes = chunk_bytes.strip()
+                    if not chunk_bytes:
+                        continue
+                    chunk_bytes = chunk_bytes.decode("utf-8")
+                    # NOTE: SSE comments (often used as pings) start with
+                    # a colon. These are not JSON data payload and should
+                    # be skipped.
+                    if chunk_bytes.startswith(":"):
+                        continue
 
-                        chunk = chunk_bytes.removeprefix("data: ")
+                    chunk = chunk_bytes.removeprefix("data: ")
 
-                        if chunk != "[DONE]":
-                            data = json.loads(chunk)
+                    if chunk != "[DONE]":
+                        data = json.loads(chunk)
 
-                            # NOTE: Some completion API might have a last
-                            # usage summary response without a token so we
-                            # want to check a token was generated
-                            if choices := data.get("choices"):
-                                # Note that text could be empty here
-                                # e.g. for special tokens
-                                text = choices[0].get("text")
-                                timestamp = time.perf_counter()
-                                # First token
-                                if not first_chunk_received:
-                                    first_chunk_received = True
-                                    ttft = time.perf_counter() - st
-                                    output.ttft = ttft
+                        # NOTE: Some completion API might have a last
+                        # usage summary response without a token so we
+                        # want to check a token was generated
+                        if choices := data.get("choices"):
+                            # Note that text could be empty here
+                            # e.g. for special tokens
+                            text = choices[0].get("text")
+                            timestamp = time.perf_counter()
+                            # First token
+                            if not first_chunk_received:
+                                first_chunk_received = True
+                                ttft = time.perf_counter() - st
+                                output.ttft = ttft
 
-                                # Decoding phase
-                                else:
-                                    output.itl.append(timestamp -
-                                                      most_recent_timestamp)
+                            # Decoding phase
+                            else:
+                                output.itl.append(timestamp -
+                                                    most_recent_timestamp)
 
-                                most_recent_timestamp = timestamp
-                                generated_text += text or ""
-                            elif usage := data.get("usage"):
-                                output.output_tokens = usage.get(
-                                    "completion_tokens")
-                    if first_chunk_received:
-                        output.success = True
-                    else:
-                        output.success = False
-                        output.error = (
-                            "Never received a valid chunk to calculate TTFT."
-                            "This response will be marked as failed!")
-                    output.generated_text = generated_text
-                    output.latency = most_recent_timestamp - st
+                            most_recent_timestamp = timestamp
+                            generated_text += text or ""
+                        elif usage := data.get("usage"):
+                            output.output_tokens = usage.get(
+                                "completion_tokens")
+                if first_chunk_received:
+                    output.success = True
                 else:
-                    output.error = response.reason or ""
                     output.success = False
-        except Exception:
-            output.success = False
-            exc_info = sys.exc_info()
-            output.error = "".join(traceback.format_exception(*exc_info))
+                    output.error = (
+                        "Never received a valid chunk to calculate TTFT."
+                        "This response will be marked as failed!")
+                output.generated_text = generated_text
+                output.latency = most_recent_timestamp - st
+            else:
+                output.error = response.reason or ""
+                output.success = False
+    except Exception:
+        output.success = False
+        exc_info = sys.exc_info()
+        output.error = "".join(traceback.format_exception(*exc_info))
 
     if pbar:
         pbar.update(1)
@@ -164,45 +163,158 @@ async def async_request_openai_completions(
 
 async def async_request_openai_chat_completions(
     request_func_input: RequestFuncInput,
+    session: aiohttp.ClientSession,
     pbar: Optional[tqdm] = None,
 ) -> RequestFuncOutput:
     api_url = request_func_input.api_url
     assert api_url.endswith(("chat/completions", "profile")), (
         "OpenAI Chat Completions API URL must end with 'chat/completions'.")
 
-    async with aiohttp.ClientSession(trust_env=True,
-                                     timeout=AIOHTTP_TIMEOUT) as session:
-        content = [{"type": "text", "text": request_func_input.prompt}]
-        if request_func_input.multi_modal_content:
-            content.append(request_func_input.multi_modal_content)
-        payload = {
-            "model":
-            request_func_input.model_name
-            if request_func_input.model_name else request_func_input.model,
-            "messages": [
-                {
-                    "role": "user",
-                    "content": content
-                },
-            ],
-            "temperature":
-            0.0,
-            "max_completion_tokens":
-            request_func_input.output_len,
-            "stream":
-            True,
-            "stream_options": {
-                "include_usage": True,
+    content = [{"type": "text", "text": request_func_input.prompt}]
+    if request_func_input.multi_modal_content:
+        content.append(request_func_input.multi_modal_content)
+    payload = {
+        "model":
+        request_func_input.model_name
+        if request_func_input.model_name else request_func_input.model,
+        "messages": [
+            {
+                "role": "user",
+                "content": content
             },
-        }
-        if request_func_input.ignore_eos:
-            payload["ignore_eos"] = request_func_input.ignore_eos
-        if request_func_input.extra_body:
-            payload.update(request_func_input.extra_body)
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
-        }
+        ],
+        "temperature":
+        0.0,
+        "max_completion_tokens":
+        request_func_input.output_len,
+        "stream":
+        True,
+        "stream_options": {
+            "include_usage": True,
+        },
+    }
+    if request_func_input.ignore_eos:
+        payload["ignore_eos"] = request_func_input.ignore_eos
+    if request_func_input.extra_body:
+        payload.update(request_func_input.extra_body)
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
+    }
+
+    output = RequestFuncOutput()
+    output.prompt_len = request_func_input.prompt_len
+
+    generated_text = ""
+    ttft = 0.0
+    st = time.perf_counter()
+    most_recent_timestamp = st
+    try:
+        async with session.post(url=api_url, json=payload,
+                                headers=headers) as response:
+            if response.status == 200:
+                async for chunk_bytes in response.content:
+                    chunk_bytes = chunk_bytes.strip()
+                    if not chunk_bytes:
+                        continue
+                    chunk_bytes = chunk_bytes.decode("utf-8")
+                    # NOTE: SSE comments (often used as pings) start with
+                    # a colon. These are not JSON data payload and should
+                    # be skipped.
+                    if chunk_bytes.startswith(":"):
+                        continue
+
+                    chunk = chunk_bytes.removeprefix("data: ")
+
+                    if chunk != "[DONE]":
+                        timestamp = time.perf_counter()
+                        data = json.loads(chunk)
+
+                        if choices := data.get("choices"):
+                            content = choices[0]["delta"].get("content")
+                            # First token
+                            if ttft == 0.0:
+                                ttft = timestamp - st
+                                output.ttft = ttft
+
+                            # Decoding phase
+                            else:
+                                output.itl.append(timestamp -
+                                                    most_recent_timestamp)
+
+                            generated_text += content or ""
+                        elif usage := data.get("usage"):
+                            output.output_tokens = usage.get(
+                                "completion_tokens")
+
+                        most_recent_timestamp = timestamp
+
+                output.generated_text = generated_text
+                output.success = True
+                output.latency = most_recent_timestamp - st
+            else:
+                output.error = response.reason or ""
+                output.success = False
+    except Exception:
+        output.success = False
+        exc_info = sys.exc_info()
+        output.error = "".join(traceback.format_exception(*exc_info))
+
+    if pbar:
+        pbar.update(1)
+    return output
+
+
+async def async_request_openai_audio(
+    request_func_input: RequestFuncInput,
+    session: aiohttp.ClientSession,
+    pbar: Optional[tqdm] = None,
+) -> RequestFuncOutput:
+    # Lazy import without PlaceholderModule to avoid vllm dep.
+    import soundfile
+
+    api_url = request_func_input.api_url
+    assert api_url.endswith(("transcriptions", "translations")), (
+        "OpenAI Chat Completions API URL must end with 'transcriptions' ")
+    "or `translations`."
+
+    content = [{"type": "text", "text": request_func_input.prompt}]
+    payload = {
+        "model":
+        request_func_input.model_name
+        if request_func_input.model_name else request_func_input.model,
+        "temperature":
+        0.0,
+        "max_completion_tokens":
+        request_func_input.output_len,
+        "stream":
+        True,
+        "language":
+        "en",
+        # Flattened due to multipart/form-data
+        "stream_include_usage":
+        True,
+        "stream_continuous_usage_stats":
+        True,
+    }
+    if request_func_input.extra_body:
+        payload.update(request_func_input.extra_body)
+    headers = {
+        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
+    }
+
+    # Send audio file
+    def to_bytes(y, sr):
+        buffer = io.BytesIO()
+        soundfile.write(buffer, y, sr, format="WAV")
+        buffer.seek(0)
+        return buffer
+
+    with to_bytes(*request_func_input.multi_modal_content["audio"]) as f:
+        form = aiohttp.FormData()
+        form.add_field("file", f, content_type="audio/wav")
+        for key, value in payload.items():
+            form.add_field(key, str(value))
 
         output = RequestFuncOutput()
         output.prompt_len = request_func_input.prompt_len
@@ -212,28 +324,24 @@ async def async_request_openai_chat_completions(
         st = time.perf_counter()
         most_recent_timestamp = st
         try:
-            async with session.post(url=api_url, json=payload,
+            async with session.post(url=api_url,
+                                    data=form,
                                     headers=headers) as response:
                 if response.status == 200:
                     async for chunk_bytes in response.content:
                         chunk_bytes = chunk_bytes.strip()
                         if not chunk_bytes:
                             continue
-                        chunk_bytes = chunk_bytes.decode("utf-8")
-                        # NOTE: SSE comments (often used as pings) start with
-                        # a colon. These are not JSON data payload and should
-                        # be skipped.
-                        if chunk_bytes.startswith(":"):
-                            continue
-
-                        chunk = chunk_bytes.removeprefix("data: ")
 
+                        chunk = chunk_bytes.decode("utf-8").removeprefix(
+                            "data: ")
                         if chunk != "[DONE]":
                             timestamp = time.perf_counter()
                             data = json.loads(chunk)
 
                             if choices := data.get("choices"):
-                                content = choices[0]["delta"].get("content")
+                                content = choices[0]["delta"].get(
+                                    "content")
                                 # First token
                                 if ttft == 0.0:
                                     ttft = timestamp - st
@@ -241,8 +349,8 @@ async def async_request_openai_chat_completions(
 
                                 # Decoding phase
                                 else:
-                                    output.itl.append(timestamp -
-                                                      most_recent_timestamp)
+                                    output.itl.append(
+                                        timestamp - most_recent_timestamp)
 
                                 generated_text += content or ""
                             elif usage := data.get("usage"):
@@ -267,117 +375,6 @@ async def async_request_openai_chat_completions(
     return output
 
 
-async def async_request_openai_audio(
-    request_func_input: RequestFuncInput,
-    pbar: Optional[tqdm] = None,
-) -> RequestFuncOutput:
-    # Lazy import without PlaceholderModule to avoid vllm dep.
-    import soundfile
-
-    api_url = request_func_input.api_url
-    assert api_url.endswith(("transcriptions", "translations")), (
-        "OpenAI Chat Completions API URL must end with 'transcriptions' ")
-    "or `translations`."
-
-    async with aiohttp.ClientSession(trust_env=True,
-                                     timeout=AIOHTTP_TIMEOUT) as session:
-        content = [{"type": "text", "text": request_func_input.prompt}]
-        payload = {
-            "model":
-            request_func_input.model_name
-            if request_func_input.model_name else request_func_input.model,
-            "temperature":
-            0.0,
-            "max_completion_tokens":
-            request_func_input.output_len,
-            "stream":
-            True,
-            "language":
-            "en",
-            # Flattened due to multipart/form-data
-            "stream_include_usage":
-            True,
-            "stream_continuous_usage_stats":
-            True,
-        }
-        if request_func_input.extra_body:
-            payload.update(request_func_input.extra_body)
-        headers = {
-            "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
-        }
-
-        # Send audio file
-        def to_bytes(y, sr):
-            buffer = io.BytesIO()
-            soundfile.write(buffer, y, sr, format="WAV")
-            buffer.seek(0)
-            return buffer
-
-        with to_bytes(*request_func_input.multi_modal_content["audio"]) as f:
-            form = aiohttp.FormData()
-            form.add_field("file", f, content_type="audio/wav")
-            for key, value in payload.items():
-                form.add_field(key, str(value))
-
-            output = RequestFuncOutput()
-            output.prompt_len = request_func_input.prompt_len
-
-            generated_text = ""
-            ttft = 0.0
-            st = time.perf_counter()
-            most_recent_timestamp = st
-            try:
-                async with session.post(url=api_url,
-                                        data=form,
-                                        headers=headers) as response:
-                    if response.status == 200:
-                        async for chunk_bytes in response.content:
-                            chunk_bytes = chunk_bytes.strip()
-                            if not chunk_bytes:
-                                continue
-
-                            chunk = chunk_bytes.decode("utf-8").removeprefix(
-                                "data: ")
-                            if chunk != "[DONE]":
-                                timestamp = time.perf_counter()
-                                data = json.loads(chunk)
-
-                                if choices := data.get("choices"):
-                                    content = choices[0]["delta"].get(
-                                        "content")
-                                    # First token
-                                    if ttft == 0.0:
-                                        ttft = timestamp - st
-                                        output.ttft = ttft
-
-                                    # Decoding phase
-                                    else:
-                                        output.itl.append(
-                                            timestamp - most_recent_timestamp)
-
-                                    generated_text += content or ""
-                                elif usage := data.get("usage"):
-                                    output.output_tokens = usage.get(
-                                        "completion_tokens")
-
-                                most_recent_timestamp = timestamp
-
-                        output.generated_text = generated_text
-                        output.success = True
-                        output.latency = most_recent_timestamp - st
-                    else:
-                        output.error = response.reason or ""
-                        output.success = False
-            except Exception:
-                output.success = False
-                exc_info = sys.exc_info()
-                output.error = "".join(traceback.format_exception(*exc_info))
-
-        if pbar:
-            pbar.update(1)
-        return output
-
-
 # TODO: Add more request functions for different API protocols.
 ASYNC_REQUEST_FUNCS = {
     "vllm": async_request_openai_completions,
diff --git a/vllm/benchmarks/lib/ready_checker.py b/vllm/benchmarks/lib/ready_checker.py
index a663f85b629d2..7e836158386a9 100644
--- a/vllm/benchmarks/lib/ready_checker.py
+++ b/vllm/benchmarks/lib/ready_checker.py
@@ -14,6 +14,7 @@ from .endpoint_request_func import RequestFuncInput, RequestFuncOutput
 async def wait_for_endpoint(
     request_func,
     test_input: RequestFuncInput,
+    session: aiohttp.ClientSession,
     timeout_seconds: int = 600,
     retry_interval: int = 5,
 ) -> RequestFuncOutput:
@@ -55,7 +56,8 @@ async def wait_for_endpoint(
 
             # ping the endpoint using request_func
             try:
-                output = await request_func(request_func_input=test_input)
+                output = await request_func(
+                    request_func_input=test_input, session=session)
                 if output.success:
                     pbar.close()
                     return output
diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py
index 45798547ac719..ca8d218581e77 100644
--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -28,6 +28,7 @@ from dataclasses import dataclass
 from datetime import datetime
 from typing import Any, Literal, Optional
 
+import aiohttp
 import numpy as np
 from tqdm.asyncio import tqdm
 from transformers import PreTrainedTokenizerBase
@@ -338,6 +339,24 @@ async def benchmark(
     else:
         raise ValueError(f"Unknown endpoint_type: {endpoint_type}")
 
+    # Reuses connections across requests to reduce TLS handshake overhead.
+    connector = aiohttp.TCPConnector(
+        limit=max_concurrency or 0,
+        limit_per_host=max_concurrency or 0,
+        ttl_dns_cache=300,
+        use_dns_cache=True,
+        keepalive_timeout=60,
+        enable_cleanup_closed=True,
+        force_close=False,
+        ssl=("https://" in api_url),
+    )
+
+    session = aiohttp.ClientSession(
+        connector=connector,
+        trust_env=True,
+        timeout=aiohttp.ClientTimeout(total=6 * 60 * 60),
+    )
+
     print("Starting initial single prompt test run...")
     test_prompt, test_prompt_len, test_output_len, test_mm_content = (
         input_requests[0].prompt,
@@ -361,7 +380,11 @@ async def benchmark(
     )
 
     test_output = await wait_for_endpoint(
-        request_func, test_input, timeout_seconds=ready_check_timeout_sec)
+        request_func,
+        test_input,
+        session,
+        timeout_seconds=ready_check_timeout_sec,
+    )
     if not test_output.success:
         raise ValueError(
             "Initial test run failed - Please make sure benchmark arguments "
@@ -386,7 +409,8 @@ async def benchmark(
                                          multi_modal_content=test_mm_content,
                                          ignore_eos=ignore_eos,
                                          extra_body=extra_body)
-        profile_output = await request_func(request_func_input=profile_input)
+        profile_output = await request_func(
+            request_func_input=profile_input, session=session)
         if profile_output.success:
             print("Profiler started")
 
@@ -412,12 +436,14 @@ async def benchmark(
     semaphore = (asyncio.Semaphore(max_concurrency)
                  if max_concurrency else None)
 
-    async def limited_request_func(request_func_input, pbar):
+    async def limited_request_func(request_func_input, session, pbar):
         if semaphore is None:
             return await request_func(request_func_input=request_func_input,
+                                      session=session,
                                       pbar=pbar)
         async with semaphore:
-            return await request_func(request_func_input=request_func_input,
+            return await request_func(request_func_input=request_func_input, 
+                                      session=session,
                                       pbar=pbar)
 
     benchmark_start_time = time.perf_counter()
@@ -469,6 +495,7 @@ async def benchmark(
         tasks.append(
             asyncio.create_task(
                 limited_request_func(request_func_input=request_func_input,
+                                     session=session,
                                      pbar=pbar)))
     outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
 
@@ -580,9 +607,12 @@ async def benchmark(
             output_len=test_output_len,
             logprobs=logprobs,
         )
-        profile_output = await request_func(request_func_input=profile_input)
+        profile_output = await request_func(
+            request_func_input=profile_input, session=session)
         if profile_output.success:
             print("Profiler stopped")
+
+    await session.close()
     return result
 
 

From e27d25a0dcbb71a0d2e2a27d7e2b606a8df30320 Mon Sep 17 00:00:00 2001
From: "ZiTian.Zhao" <zitian.zhao@tencentmusic.com>
Date: Mon, 4 Aug 2025 10:24:02 +0800
Subject: [PATCH 181/224] [fix] fix correct assertion syntax error in attention
 utils. (#22154)

Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com>
---
 vllm/v1/attention/backends/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index 6defd211f4cfa..48bd632227c5b 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -97,7 +97,9 @@ def _make_metadata_with_slice(
 
     query_start_loc = slice_query_start_locs(attn_metadata.query_start_loc,
                                              request_slice)
-    assert len(query_start_loc >= 2)
+    assert len(query_start_loc) >= 2, (
+        f"query_start_loc must have at least 2 elements, "
+        f"got {len(query_start_loc)}")
     query_start_loc_cpu = slice_query_start_locs(
         attn_metadata.query_start_loc_cpu, request_slice)
 

From 845420ac2c2bc27ae0f96c25430b4f1cd20063cc Mon Sep 17 00:00:00 2001
From: 22quinn <33176974+22quinn@users.noreply.github.com>
Date: Sun, 3 Aug 2025 19:43:33 -0700
Subject: [PATCH 182/224] [RLHF] Fix torch.dtype not serializable in example
 (#22158)

Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
---
 examples/offline_inference/rlhf.py       | 5 ++++-
 examples/offline_inference/rlhf_utils.py | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/examples/offline_inference/rlhf.py b/examples/offline_inference/rlhf.py
index 752117a4e3623..ed974b90b57ee 100644
--- a/examples/offline_inference/rlhf.py
+++ b/examples/offline_inference/rlhf.py
@@ -126,7 +126,10 @@ for name, p in train_model.named_parameters():
 
 # Synchronize the updated weights to the inference engine.
 for name, p in train_model.named_parameters():
-    handle = llm.collective_rpc.remote("update_weight", args=(name, p.dtype, p.shape))
+    dtype_name = str(p.dtype).split(".")[-1]
+    handle = llm.collective_rpc.remote(
+        "update_weight", args=(name, dtype_name, p.shape)
+    )
     model_update_group.broadcast(p, src=0, stream=torch.cuda.current_stream())
     ray.get(handle)
 
diff --git a/examples/offline_inference/rlhf_utils.py b/examples/offline_inference/rlhf_utils.py
index c445224d75686..d2a8419ffabcd 100644
--- a/examples/offline_inference/rlhf_utils.py
+++ b/examples/offline_inference/rlhf_utils.py
@@ -45,7 +45,8 @@ class WorkerExtension:
             self.device,
         )
 
-    def update_weight(self, name, dtype, shape):
+    def update_weight(self, name, dtype_name, shape):
+        dtype = getattr(torch, dtype_name)
         weight = torch.empty(shape, dtype=dtype, device="cuda")
         self.model_update_group.broadcast(
             weight, src=0, stream=torch.cuda.current_stream()

From 0d7db16a92afd9fc005ed0fba73356845586f5e7 Mon Sep 17 00:00:00 2001
From: Abirdcfly <fp544037857@gmail.com>
Date: Mon, 4 Aug 2025 10:57:03 +0800
Subject: [PATCH 183/224] [PD] add test for chat completions endpoint (#21925)

Signed-off-by: Abirdcfly <fp544037857@gmail.com>
---
 .../nixl_integration/test_disagg_accuracy.py  | 41 ++++++++++++-------
 .../nixl_integration/toy_proxy_server.py      |  2 +
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/tests/v1/kv_connector/nixl_integration/test_disagg_accuracy.py b/tests/v1/kv_connector/nixl_integration/test_disagg_accuracy.py
index 00e62f351ce30..697e101c35926 100644
--- a/tests/v1/kv_connector/nixl_integration/test_disagg_accuracy.py
+++ b/tests/v1/kv_connector/nixl_integration/test_disagg_accuracy.py
@@ -51,20 +51,31 @@ def check_vllm_server(url: str, timeout=5, retries=3) -> bool:
     return False
 
 
-def run_simple_prompt(base_url: str, model_name: str,
-                      input_prompt: str) -> str:
+def run_simple_prompt(base_url: str, model_name: str, input_prompt: str,
+                      use_chat_endpoint: bool) -> str:
     client = openai.OpenAI(api_key="EMPTY", base_url=base_url)
-    completion = client.completions.create(model=model_name,
-                                           prompt=input_prompt,
-                                           max_tokens=MAX_OUTPUT_LEN,
-                                           temperature=0.0,
-                                           seed=42)
+    if use_chat_endpoint:
+        completion = client.chat.completions.create(
+            model=model_name,
+            messages=[{
+                "role": "user",
+                "content": [{
+                    "type": "text",
+                    "text": input_prompt
+                }]
+            }],
+            max_completion_tokens=MAX_OUTPUT_LEN,
+            temperature=0.0,
+            seed=42)
+        return completion.choices[0].message.content
+    else:
+        completion = client.completions.create(model=model_name,
+                                               prompt=input_prompt,
+                                               max_tokens=MAX_OUTPUT_LEN,
+                                               temperature=0.0,
+                                               seed=42)
 
-    # print("-" * 50)
-    # print(f"Completion results for {model_name}:")
-    # print(completion)
-    # print("-" * 50)
-    return completion.choices[0].text
+        return completion.choices[0].text
 
 
 def main():
@@ -125,10 +136,12 @@ def main():
             f"vllm server: {args.service_url} is not ready yet!")
 
     output_strs = dict()
-    for prompt in SAMPLE_PROMPTS:
+    for i, prompt in enumerate(SAMPLE_PROMPTS):
+        use_chat_endpoint = (i % 2 == 1)
         output_str = run_simple_prompt(base_url=service_url,
                                        model_name=args.model_name,
-                                       input_prompt=prompt)
+                                       input_prompt=prompt,
+                                       use_chat_endpoint=use_chat_endpoint)
         print(f"Prompt: {prompt}, output: {output_str}")
         output_strs[prompt] = output_str
 
diff --git a/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py b/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
index 66e237da0f80a..905ae0ea71722 100644
--- a/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
+++ b/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
@@ -162,6 +162,8 @@ async def send_request_to_service(client_info: dict, endpoint: str,
     }
     req_data["stream"] = False
     req_data["max_tokens"] = 1
+    if "max_completion_tokens" in req_data:
+        req_data["max_completion_tokens"] = 1
     if "stream_options" in req_data:
         del req_data["stream_options"]
     headers = {

From c2e75b3c11047eec0f184577ce134879ce993f77 Mon Sep 17 00:00:00 2001
From: Ning Xie <andy.xning@gmail.com>
Date: Mon, 4 Aug 2025 11:03:58 +0800
Subject: [PATCH 184/224] remove duplicate code within
 cleanup_dist_env_and_memory (#22147)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
---
 vllm/distributed/parallel_state.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index ee581124db510..f31e4766bfdad 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -1238,8 +1238,6 @@ def destroy_distributed_environment():
 def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
     destroy_model_parallel()
     destroy_distributed_environment()
-    with contextlib.suppress(AssertionError):
-        torch.distributed.destroy_process_group()
     if shutdown_ray:
         import ray  # Lazy import Ray
         ray.shutdown()

From aa7012eb6db69baab57c80ac596d088eb81e090f Mon Sep 17 00:00:00 2001
From: Giancarlo Delfin <32987265+TheEpicDolphin@users.noreply.github.com>
Date: Sun, 3 Aug 2025 22:13:26 -0700
Subject: [PATCH 185/224] Add tree attention backend for v1 (part 1) (#20401)

Signed-off-by: Giancarlo Delfin <gdelfin@meta.com>
---
 tests/v1/attention/test_attention_backends.py |   2 +-
 tests/v1/attention/utils.py                   |   6 +-
 tests/v1/spec_decode/test_eagle.py            |   7 +-
 tests/v1/spec_decode/test_tree_attention.py   | 299 ++++++++++++
 .../attention/ops/triton_unified_attention.py |  48 ++
 vllm/config.py                                |  13 +
 vllm/engine/arg_utils.py                      |   2 +-
 vllm/platforms/cuda.py                        |   4 +
 vllm/platforms/interface.py                   |   1 +
 vllm/v1/attention/backends/tree_attn.py       | 452 ++++++++++++++++++
 vllm/v1/attention/backends/utils.py           |  20 +
 vllm/v1/spec_decode/eagle.py                  | 269 ++++++++++-
 12 files changed, 1098 insertions(+), 25 deletions(-)
 create mode 100644 tests/v1/spec_decode/test_tree_attention.py
 create mode 100644 vllm/v1/attention/backends/tree_attn.py

diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py
index f197cbb7bbba0..ac08b9052cd80 100644
--- a/tests/v1/attention/test_attention_backends.py
+++ b/tests/v1/attention/test_attention_backends.py
@@ -17,7 +17,7 @@ from vllm.v1.kv_cache_interface import FullAttentionSpec
 
 BACKENDS_TO_TEST = [
     _Backend.FLASH_ATTN_VLLM_V1, _Backend.FLASHINFER_VLLM_V1,
-    _Backend.FLEX_ATTENTION, _Backend.TRITON_ATTN_VLLM_V1
+    _Backend.FLEX_ATTENTION, _Backend.TRITON_ATTN_VLLM_V1, _Backend.TREE_ATTN
 ]
 
 # Remove flashinfer from the list if it's not available
diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py
index be6cfce6fba8a..78a6509986fcd 100644
--- a/tests/v1/attention/utils.py
+++ b/tests/v1/attention/utils.py
@@ -109,11 +109,11 @@ def create_common_attn_metadata(
 
 def get_attention_backend(backend_name: _Backend):
     """Set up attention backend classes for testing.
-    
+
     Args:
         backend_name: Name of the backend ("flash_attn", "flashinfer", etc.)
         vllm_config: VllmConfig instance
-        
+
     Returns:
         Tuple of (backend_builder_class, backend_impl_class)
     """
@@ -126,6 +126,8 @@ def get_attention_backend(backend_name: _Backend):
         "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend",
         _Backend.TRITON_ATTN_VLLM_V1:
         "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend",
+        _Backend.TREE_ATTN:
+        "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend",
     }
 
     if backend_name not in backend_map:
diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py
index a126c7c943ed0..05f6dd40a9ea9 100644
--- a/tests/v1/spec_decode/test_eagle.py
+++ b/tests/v1/spec_decode/test_eagle.py
@@ -202,7 +202,9 @@ def test_load_model(mock_get_model, mock_get_layers, mock_get_pp_group, method,
 
 
 @pytest.mark.parametrize("num_speculative_tokens", [1, 3, 8])
-def test_propose(num_speculative_tokens):
+@pytest.mark.parametrize("backend",
+                         [_Backend.FLASH_ATTN_VLLM_V1, _Backend.TREE_ATTN])
+def test_propose(num_speculative_tokens, backend):
     # Use GPU device
     device = torch.device(current_platform.device_type)
 
@@ -301,8 +303,7 @@ def test_propose(num_speculative_tokens):
                                    device=device)
     sampling_metadata = mock.MagicMock()
 
-    attn_metadata_builder_cls, _ = get_attention_backend(
-        _Backend.FLASH_ATTN_VLLM_V1)
+    attn_metadata_builder_cls, _ = get_attention_backend(backend)
     attn_metadata_builder = attn_metadata_builder_cls(
         kv_cache_spec=create_standard_kv_cache_spec(proposer.vllm_config),
         layer_names=proposer.attn_layer_names,
diff --git a/tests/v1/spec_decode/test_tree_attention.py b/tests/v1/spec_decode/test_tree_attention.py
new file mode 100644
index 0000000000000..42468daa62a9a
--- /dev/null
+++ b/tests/v1/spec_decode/test_tree_attention.py
@@ -0,0 +1,299 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import math
+from typing import Optional
+
+import torch
+
+from tests.v1.attention.utils import (_Backend, create_standard_kv_cache_spec,
+                                      create_vllm_config,
+                                      get_attention_backend)
+from vllm.config import ParallelConfig, SpeculativeConfig
+from vllm.v1.attention.backends.utils import CommonAttentionMetadata
+
+
+class MockAttentionLayer(torch.nn.Module):
+    _q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
+    _k_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
+    _v_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return x
+
+
+def forward_attention(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    v: torch.Tensor,
+    kv_cache: torch.Tensor,
+    block_table: torch.Tensor,
+    slot_mapping: torch.Tensor,
+    seqlen_k: int,
+    backend: _Backend,
+    spec_token_tree: Optional[str] = None,
+    num_spec_tokens: int = 0,
+) -> torch.Tensor:
+    batch_size, q_len, num_heads, dim_per_head = q.shape
+    num_kv_heads = k.shape[-2]
+    # Initialize the query and KV sequence lengths.
+    query_start_loc = q_len * torch.arange(
+        batch_size + 1, device=q.device, dtype=torch.int32)
+    query_lens = torch.diff(query_start_loc)
+    seq_lens = torch.full(
+        (batch_size, ),
+        seqlen_k,
+        device=q.device,
+        dtype=torch.int32,
+    )
+    context_lens = seq_lens - query_lens
+    max_query_len = q_len
+    num_actual_tokens = query_start_loc[-1]
+
+    softmax_scale = q.shape[-1]**(-0.5)
+    layer = MockAttentionLayer()
+
+    # Build common metadata.
+    model_name = "meta-llama/Meta-Llama-3-8B"
+    builder_cls, impl_cls = get_attention_backend(backend)
+    vllm_config = create_vllm_config(model_name=model_name,
+                                     max_model_len=max(seq_lens))
+    if spec_token_tree is not None:
+        # Create speculative config if token tree is specified.
+        vllm_config.speculative_config = SpeculativeConfig(
+            target_model_config=vllm_config.model_config,
+            target_parallel_config=ParallelConfig(),
+            model=model_name,
+            method="eagle",
+            num_speculative_tokens=num_spec_tokens,
+            speculative_token_tree=spec_token_tree)
+    kv_cache_spec = create_standard_kv_cache_spec(vllm_config)
+    builder = builder_cls(kv_cache_spec, [], vllm_config, q.device)
+    common_attn_metadata = CommonAttentionMetadata(
+        query_start_loc=query_start_loc,
+        query_start_loc_cpu=query_start_loc.cpu(),
+        seq_lens=seq_lens,
+        seq_lens_cpu=seq_lens.cpu(),
+        num_computed_tokens_cpu=context_lens.cpu(),
+        num_reqs=batch_size,
+        num_actual_tokens=num_actual_tokens,
+        max_query_len=max_query_len,
+        block_table_tensor=block_table,
+        slot_mapping=slot_mapping,
+    )
+
+    # Build attention metadata.
+    attn_metadata = builder.build(
+        common_prefix_len=0,
+        common_attn_metadata=common_attn_metadata,
+    )
+
+    # Initialize the backend implementation.
+    instance = impl_cls(
+        num_heads=num_heads,
+        head_size=dim_per_head,
+        scale=softmax_scale,
+        num_kv_heads=num_kv_heads,
+        alibi_slopes=None,
+        sliding_window=None,
+        kv_cache_dtype="auto",
+    )
+
+    # Run forward pass and return output.
+    query = q.view(-1, num_heads, dim_per_head)
+    key = k.view(-1, num_kv_heads, dim_per_head)
+    value = v.view(-1, num_kv_heads, dim_per_head)
+    output = torch.empty_like(query)
+    return instance.forward(
+        layer=layer,
+        query=query,
+        key=key,
+        value=value,
+        kv_cache=kv_cache.clone(),
+        attn_metadata=attn_metadata,
+        output=output,
+    )
+
+
+def test_tree_attn_correctness() -> None:
+    torch.manual_seed(42)
+    torch.cuda.manual_seed_all(42)
+
+    device = "cuda"
+    tree_attn_masks = {
+        # Chain.
+        "[(0,), (0, 0), (0, 0, 0)]":
+        torch.tensor(
+            [
+                [1, 0, 0, 0],
+                [1, 1, 0, 0],
+                [1, 1, 1, 0],
+                [1, 1, 1, 1],
+            ],
+            device=device,
+            dtype=torch.int32,
+        ),
+        # Tree.
+        "[(0,), (1,), (0, 0), (0, 1), (1, 0), (1, 1)]":
+        torch.tensor(
+            [
+                [1, 0, 0, 0, 0, 0, 0],
+                [1, 1, 0, 0, 0, 0, 0],
+                [1, 0, 1, 0, 0, 0, 0],
+                [1, 1, 0, 1, 0, 0, 0],
+                [1, 1, 0, 0, 1, 0, 0],
+                [1, 0, 1, 0, 0, 1, 0],
+                [1, 0, 1, 0, 0, 0, 1],
+            ],
+            device=device,
+            dtype=torch.int32,
+        ),
+    }
+
+    dim_per_head = 128
+    num_kv_heads = 2
+    block_size = 128
+    max_sequence_length = 8192
+    randomize_blocks = True
+    for batch_size in [1, 16, 32]:
+        for num_heads in [2, 4]:
+            for sequence_position in [16, 1024, 2048]:
+                for spec_token_tree, tree_attn_mask in tree_attn_masks.items():
+                    # Assert that the number of heads is divisible
+                    # by the number of KV heads.
+                    assert num_heads % num_kv_heads == 0
+
+                    # Initialize q, k, and v.
+                    tree_size_q = tree_attn_mask.shape[0]
+                    seqlen_k = sequence_position + tree_size_q
+                    q = torch.randn(
+                        (batch_size, tree_size_q, num_heads, dim_per_head),
+                        device=device,
+                        dtype=torch.bfloat16,
+                    )
+                    k = torch.randn(
+                        (batch_size, tree_size_q, num_kv_heads, dim_per_head),
+                        device=device,
+                        dtype=torch.bfloat16,
+                    )
+                    v = torch.randn(
+                        (batch_size, tree_size_q, num_kv_heads, dim_per_head),
+                        device=device,
+                        dtype=torch.bfloat16,
+                    )
+
+                    # Setup the block table and KV cache for paged KV.
+                    assert max_sequence_length % block_size == 0
+                    max_blocks_per_batch = max_sequence_length // block_size
+                    kv_cache = torch.randn(
+                        (
+                            2,
+                            batch_size * max_blocks_per_batch,
+                            block_size,
+                            num_kv_heads,
+                            dim_per_head,
+                        ),
+                        device=q.device,
+                        dtype=torch.bfloat16,
+                    )
+                    num_alloc_blocks_per_batch = math.ceil(seqlen_k /
+                                                           block_size)
+                    block_table = torch.zeros(
+                        (batch_size, max_blocks_per_batch),
+                        device=q.device,
+                        dtype=torch.int32,
+                    )
+                    block_ids = torch.arange(
+                        0,
+                        batch_size * num_alloc_blocks_per_batch,
+                        device=q.device,
+                        dtype=torch.int32,
+                    )
+                    if randomize_blocks:
+                        # Randomize the block ids.
+                        block_ids = block_ids[torch.randperm(
+                            block_ids.numel())]
+                    block_table[:, :
+                                num_alloc_blocks_per_batch] = block_ids.view(
+                                    -1, num_alloc_blocks_per_batch)
+
+                    # Setup the slot mapping for the input KVs.
+                    tree_positions = sequence_position + torch.arange(
+                        0,
+                        tree_size_q,
+                        device=q.device,
+                        dtype=torch.int64,
+                    ).repeat(batch_size, 1)
+                    tree_slot_mapping = _gen_slot_mapping(
+                        tree_positions, block_table, block_size)
+
+                    # Compute attention for the tree.
+                    tree_attn_output = forward_attention(
+                        q=q,
+                        k=k,
+                        v=v,
+                        kv_cache=kv_cache,
+                        block_table=block_table,
+                        slot_mapping=tree_slot_mapping,
+                        seqlen_k=seqlen_k,
+                        backend=_Backend.TREE_ATTN,
+                        spec_token_tree=spec_token_tree,
+                        num_spec_tokens=tree_size_q - 1,
+                    ).view(batch_size, -1, num_heads, dim_per_head)
+
+                    # Verify that the chain attention output for each
+                    # branch of the tree (computed using FA3) matches
+                    # the tree attention output.
+                    for q_index in range(tree_size_q):
+                        # Get the q, k, and v for the branch.
+                        branch_mask = tree_attn_mask[q_index, :]
+                        branch_indices = torch.nonzero(branch_mask,
+                                                       as_tuple=True)[0]
+                        q_len = branch_indices.shape[0]
+                        q_branch = q[:, branch_indices]
+                        k_branch = k[:, branch_indices]
+                        v_branch = v[:, branch_indices]
+
+                        # Setup slot mapping for the branch.
+                        branch_positions = sequence_position + torch.arange(
+                            0,
+                            q_len,
+                            device=q.device,
+                            dtype=torch.int64,
+                        ).repeat(batch_size, 1)
+                        branch_slot_mapping = _gen_slot_mapping(
+                            branch_positions, block_table, block_size)
+
+                        # Compute flash attention for the branch.
+                        flash_attn_output = forward_attention(
+                            q=q_branch,
+                            k=k_branch,
+                            v=v_branch,
+                            kv_cache=kv_cache,
+                            block_table=block_table,
+                            slot_mapping=branch_slot_mapping,
+                            seqlen_k=sequence_position + q_len,
+                            backend=_Backend.FLASH_ATTN_VLLM_V1,
+                        ).view(batch_size, -1, num_heads, dim_per_head)
+
+                        # Compare the outputs.
+                        assert torch.allclose(
+                            tree_attn_output[:, branch_indices],
+                            flash_attn_output,
+                            atol=7.81e-3,
+                        ), (f"outputs are not close for "
+                            f"batch_size: {batch_size}, "
+                            f"num_heads: {num_heads}, "
+                            f"sequence_position: {sequence_position}, "
+                            f"tree_attn_mask: {tree_attn_mask}, "
+                            f"q_index: {q_index}.")
+
+
+def _gen_slot_mapping(positions: torch.Tensor, block_table: torch.Tensor,
+                      block_size: int):
+    block_indices = positions // block_size
+    blocks = block_table.gather(dim=1, index=block_indices)
+    return (blocks * block_size + positions % block_size).view(-1)
diff --git a/vllm/attention/ops/triton_unified_attention.py b/vllm/attention/ops/triton_unified_attention.py
index eb9c4f1c1030a..0fdba569f93f2 100644
--- a/vllm/attention/ops/triton_unified_attention.py
+++ b/vllm/attention/ops/triton_unified_attention.py
@@ -55,6 +55,7 @@ def kernel_unified_attention_2d(
         block_tables_ptr,  # [num_seqs, max_num_blocks_per_seq]
         seq_lens_ptr,  # [num_seqs]
         alibi_slopes_ptr,  # [num_query_heads]
+        qq_bias_ptr,  # [num_query_tokens, num_query_tokens]
         scale,  # float32
         k_scale,  # float32
         v_scale,  # float32
@@ -66,10 +67,12 @@ def kernel_unified_attention_2d(
         query_stride_1: tl.int64,  # int, should be equal to head_size
         output_stride_0: tl.int64,  # int
         output_stride_1: tl.int64,  # int, should be equal to head_size
+        qq_bias_stride_0: tl.int64,  # int
         BLOCK_SIZE: tl.constexpr,  # int
         HEAD_SIZE: tl.constexpr,  # int
         HEAD_SIZE_PADDED: tl.constexpr,  # int, must be power of 2
         USE_ALIBI_SLOPES: tl.constexpr,  # bool
+        USE_QQ_BIAS: tl.constexpr,  # bool
         USE_SOFTCAP: tl.constexpr,  # bool
         SLIDING_WINDOW: tl.constexpr,  # int
         stride_k_cache_0: tl.int64,  # int
@@ -144,6 +147,11 @@ def kernel_unified_attention_2d(
                               mask=query_mask_1,
                               other=0.0)
 
+    # query-query attention bias
+    if USE_QQ_BIAS:
+        qq_bias_row_ptrs = (qq_bias_ptr + query_pos[:, None] * qq_bias_stride_0
+                            )  # shape: [BLOCK_M]
+
     # compute the length of the longest sequence prefix spanned by any
     # query token in the current q_block (q_block_local_idx)
     max_seq_prefix_len = context_len + q_block_local_idx * BLOCK_Q + (
@@ -223,6 +231,18 @@ def kernel_unified_attention_2d(
         if USE_ALIBI_SLOPES:
             S += alibi_slope[:, None] * (seq_offset - context_len)
 
+        if USE_QQ_BIAS:
+            # compute key positions relative to query section
+            key_rel_pos = seq_offset - context_len  # shape: [BLOCK_SIZE]
+            # load bias only for keys that correspond to queries
+            is_query_key = key_rel_pos >= 0 and key_rel_pos < qq_bias_stride_0
+            qq_bias = tl.load(
+                qq_bias_row_ptrs + key_rel_pos[None, :],
+                mask=is_query_key[None, :],  # avoid OOB for context keys
+                other=0.0,
+            )
+            S += qq_bias
+
         # compute running maximum
         # m_j : (BLOCK_M,)
         m_j = tl.maximum(M, tl.max(S, axis=1))
@@ -275,6 +295,7 @@ def kernel_unified_attention_3d(
         block_tables_ptr,  # [num_seqs, max_num_blocks_per_seq]
         seq_lens_ptr,  # [num_seqs]
         alibi_slopes_ptr,  # [num_query_heads]
+        qq_bias_ptr,  # [num_query_tokens, num_query_tokens]
         scale,  # float32
         k_scale,  # float32
         v_scale,  # float32
@@ -284,10 +305,12 @@ def kernel_unified_attention_3d(
         block_table_stride: tl.int64,  # int
         query_stride_0: tl.int64,  # int
         query_stride_1: tl.int64,  # int, should be equal to head_size
+        qq_bias_stride_0: tl.int64,  # int
         BLOCK_SIZE: tl.constexpr,  # int
         HEAD_SIZE: tl.constexpr,  # int
         HEAD_SIZE_PADDED: tl.constexpr,  # int, must be power of 2
         USE_ALIBI_SLOPES: tl.constexpr,  # bool
+        USE_QQ_BIAS: tl.constexpr,  # bool
         USE_SOFTCAP: tl.constexpr,  # bool
         SLIDING_WINDOW: tl.constexpr,  # int
         stride_k_cache_0: tl.int64,  # int
@@ -373,6 +396,11 @@ def kernel_unified_attention_3d(
                               mask=query_mask_1,
                               other=0.0)
 
+    # query-query attention bias
+    if USE_QQ_BIAS:
+        qq_bias_row_ptrs = (qq_bias_ptr + query_pos[:, None] * qq_bias_stride_0
+                            )  # shape: [BLOCK_M]
+
     num_blocks = cdiv_fn(seq_len, BLOCK_SIZE)
 
     # iterate through tiles within current segment
@@ -442,6 +470,18 @@ def kernel_unified_attention_3d(
         if USE_ALIBI_SLOPES:
             S += alibi_slope[:, None] * (seq_offset - context_len)
 
+        if USE_QQ_BIAS:
+            # compute key positions relative to query section
+            key_rel_pos = seq_offset - context_len  # shape: [BLOCK_SIZE]
+            # load bias only for keys that correspond to queries
+            is_query_key = key_rel_pos >= 0 and key_rel_pos < qq_bias_stride_0
+            qq_bias = tl.load(
+                qq_bias_row_ptrs + key_rel_pos[None, :],
+                mask=is_query_key[None, :],  # avoid OOB for context keys
+                other=0.0,
+            )
+            S += qq_bias
+
         # compute running maximum
         # m_j : (BLOCK_M,)
         m_j = tl.maximum(M, tl.max(S, axis=1))
@@ -586,6 +626,7 @@ def unified_attention(
     k_descale,
     v_descale,
     alibi_slopes=None,
+    qq_bias=None,
 ):
     assert causal, "Only causal attention is supported"
     assert q_descale is None, "Q scales not supported"
@@ -595,6 +636,7 @@ def unified_attention(
         "Block size must be at least 32 for fp8"
 
     use_alibi_slopes = alibi_slopes is not None
+    use_qq_bias = qq_bias is not None
 
     block_size = v.shape[1]
     num_seqs = len(seqused_k)
@@ -630,6 +672,7 @@ def unified_attention(
             block_tables_ptr=block_table,
             seq_lens_ptr=seqused_k,
             alibi_slopes_ptr=alibi_slopes,
+            qq_bias_ptr=qq_bias,
             scale=softmax_scale,
             k_scale=k_descale,
             v_scale=v_descale,
@@ -641,10 +684,12 @@ def unified_attention(
             query_stride_1=q.stride(1),
             output_stride_0=out.stride(0),
             output_stride_1=out.stride(1),
+            qq_bias_stride_0=qq_bias.stride(0) if use_qq_bias else 0,
             BLOCK_SIZE=block_size,
             HEAD_SIZE=head_size,
             HEAD_SIZE_PADDED=triton.next_power_of_2(head_size),
             USE_ALIBI_SLOPES=use_alibi_slopes,
+            USE_QQ_BIAS=use_qq_bias,
             USE_SOFTCAP=(softcap > 0),
             SLIDING_WINDOW=(1 + window_size[0]),
             stride_k_cache_0=k.stride(0),
@@ -699,6 +744,7 @@ def unified_attention(
                 block_tables_ptr=block_table,
                 seq_lens_ptr=seqused_k,
                 alibi_slopes_ptr=alibi_slopes,
+                qq_bias_ptr=qq_bias,
                 scale=softmax_scale,
                 k_scale=k_descale,
                 v_scale=v_descale,
@@ -708,10 +754,12 @@ def unified_attention(
                 block_table_stride=block_table.stride(0),
                 query_stride_0=q.stride(0),
                 query_stride_1=q.stride(1),
+                qq_bias_stride_0=qq_bias.stride(0) if use_qq_bias else 0,
                 BLOCK_SIZE=block_size,
                 HEAD_SIZE=head_size,
                 HEAD_SIZE_PADDED=triton.next_power_of_2(head_size),
                 USE_ALIBI_SLOPES=use_alibi_slopes,
+                USE_QQ_BIAS=use_qq_bias,
                 USE_SOFTCAP=(softcap > 0),
                 SLIDING_WINDOW=(1 + window_size[0]),
                 stride_k_cache_0=k.stride(0),
diff --git a/vllm/config.py b/vllm/config.py
index ee8f3dd98dd86..871df455ef58f 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -3049,6 +3049,19 @@ class SpeculativeConfig:
                             f"num_speculative_tokens:{self.num_speculative_tokens}"
                             f" must be divisible by {n_predict=}")
 
+                if self.speculative_token_tree is None:
+                    # Generate chain of tokens.
+                    self.speculative_token_tree = str([
+                        (i + 1) * (0, )
+                        for i in range(self.num_speculative_tokens)
+                    ])
+                else:
+                    # Sort the token tree breadth-first.
+                    tree_choices = ast.literal_eval(
+                        self.speculative_token_tree)
+                    self.speculative_token_tree = str(
+                        sorted(tree_choices, key=lambda t: (len(t), t)))
+
                 self.draft_tensor_parallel_size = \
                     SpeculativeConfig._verify_and_get_draft_tp(
                         self.target_parallel_config,
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index c94e440e5c845..5eb9660cd1e8c 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1454,7 +1454,6 @@ class EngineArgs:
                 "Please consider using other speculative decoding methods "
                 "such as ngram, medusa, eagle, or deepseek_mtp.")
 
-        # No XFormers so far.
         V1_BACKENDS = [
             "FLASH_ATTN_VLLM_V1",
             "FLASH_ATTN",
@@ -1469,6 +1468,7 @@ class EngineArgs:
             "ROCM_AITER_MLA",
             "TORCH_SDPA_VLLM_V1",
             "FLEX_ATTENTION",
+            "TREE_ATTN",
         ]
         if (envs.is_set("VLLM_ATTENTION_BACKEND")
                 and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index a90910639f784..b61b39a9274d0 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -270,6 +270,7 @@ class CudaPlatformBase(Platform):
             FLEX_ATTENTION_V1 = "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend"  # noqa: E501
             TRITON_ATTN_VLLM_V1 = "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend"  # noqa: E501
             FLASH_ATTN_V1 = "vllm.v1.attention.backends.flash_attn.FlashAttentionBackend"  # noqa: E501
+            TREE_ATTN_V1 = "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend"  # noqa: E501
 
             if selected_backend == _Backend.FLASHINFER:
                 logger.info_once("Using FlashInfer backend on V1 engine.")
@@ -287,6 +288,9 @@ class CudaPlatformBase(Platform):
             elif selected_backend == _Backend.FLASH_ATTN:
                 logger.info_once("Using Flash Attention backend on V1 engine.")
                 return FLASH_ATTN_V1
+            elif selected_backend == _Backend.TREE_ATTN:
+                logger.info_once("Using Tree Attention backend on V1 engine.")
+                return TREE_ATTN_V1
 
             from vllm.attention.selector import is_attn_backend_supported
 
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 997aee7063f57..61ce868c13b47 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -62,6 +62,7 @@ class _Backend(enum.Enum):
     DIFFERENTIAL_FLASH_ATTN = enum.auto()
     NO_ATTENTION = enum.auto()
     FLEX_ATTENTION = enum.auto()
+    TREE_ATTN = enum.auto()
 
 
 class PlatformEnum(enum.Enum):
diff --git a/vllm/v1/attention/backends/tree_attn.py b/vllm/v1/attention/backends/tree_attn.py
new file mode 100644
index 0000000000000..4fb7483284053
--- /dev/null
+++ b/vllm/v1/attention/backends/tree_attn.py
@@ -0,0 +1,452 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Attention layer with TreeAttention."""
+
+import ast
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Optional
+
+import torch
+
+from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
+                                              AttentionMetadata, AttentionType)
+from vllm.attention.ops.triton_unified_attention import unified_attention
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+from vllm.v1.attention.backends.utils import (
+    AttentionMetadataBuilder, CommonAttentionMetadata,
+    reorder_batch_to_split_decodes_and_prefills, split_decodes_and_prefills)
+from vllm.v1.kv_cache_interface import AttentionSpec
+
+if TYPE_CHECKING:
+    from vllm.v1.core.sched.output import SchedulerOutput
+    from vllm.v1.worker.gpu_input_batch import InputBatch
+
+from vllm import _custom_ops as ops
+
+logger = init_logger(__name__)
+
+
+class TreeAttentionBackend(AttentionBackend):
+
+    accept_output_buffer: bool = True
+
+    @classmethod
+    def get_supported_dtypes(cls) -> list[torch.dtype]:
+        return [torch.float16, torch.bfloat16]
+
+    @classmethod
+    def get_supported_head_sizes(cls) -> list[int]:
+        return [32, 64, 96, 128, 160, 192, 224, 256]
+
+    @classmethod
+    def validate_head_size(cls, head_size: int) -> None:
+        supported_head_sizes = cls.get_supported_head_sizes()
+        if head_size not in supported_head_sizes:
+            attn_type = cls.__name__.removesuffix("Backend")
+            raise ValueError(
+                f"Head size {head_size} is not supported by {attn_type}. "
+                f"Supported head sizes are: {supported_head_sizes}. "
+                "Set VLLM_ATTENTION_BACKEND=FLEX_ATTENTION to use "
+                "FlexAttention backend which supports all head sizes.")
+
+    @staticmethod
+    def get_name() -> str:
+        return "TREE_ATTN_VLLM_V1"
+
+    @staticmethod
+    def get_impl_cls() -> type["TreeAttentionImpl"]:
+        return TreeAttentionImpl
+
+    @staticmethod
+    def get_metadata_cls() -> type["AttentionMetadata"]:
+        return TreeAttentionMetadata
+
+    @staticmethod
+    def get_kv_cache_shape(
+        num_blocks: int,
+        block_size: int,
+        num_kv_heads: int,
+        head_size: int,
+    ) -> tuple[int, ...]:
+        if block_size % 16 != 0:
+            raise ValueError("Block size must be a multiple of 16.")
+        return (2, num_blocks, block_size, num_kv_heads, head_size)
+
+    @staticmethod
+    def get_builder_cls() -> type["TreeAttentionMetadataBuilder"]:
+        return TreeAttentionMetadataBuilder
+
+    @staticmethod
+    def use_cascade_attention(*args, **kwargs) -> bool:
+        return False
+
+
+@dataclass
+class TreeAttentionMetadata:
+    num_actual_tokens: int  # Number of tokens excluding padding.
+    max_query_len: int
+    query_start_loc: torch.Tensor
+    max_seq_len: int
+    seq_lens: torch.Tensor
+    block_table: torch.Tensor
+    slot_mapping: torch.Tensor
+
+    num_prefill_tokens: int = 0
+    num_decode_tokens: int = 0
+    num_prefills: int = 0
+    num_decodes: int = 0
+
+    tree_attn_bias: Optional[torch.Tensor] = None
+
+    # Cached Prefill/decode metadata.
+    _cached_prefill_metadata: Optional["TreeAttentionMetadata"] = None
+    _cached_decode_metadata: Optional["TreeAttentionMetadata"] = None
+
+    @property
+    def prefill_metadata(self) -> Optional["TreeAttentionMetadata"]:
+        if self.num_prefills == 0:
+            return None
+
+        if self._cached_prefill_metadata is not None:
+            # Recover cached prefill-phase attention
+            # metadata structure
+            return self._cached_prefill_metadata
+
+        q_start_loc = self.query_start_loc[self.num_decodes:]
+        q_seqlens = torch.diff(q_start_loc)
+        kv_seqlens = self.seq_lens[self.num_decodes:]
+        # Construct & cache prefill-phase attention metadata structure
+        self._cached_prefill_metadata = TreeAttentionMetadata(
+            num_actual_tokens=self.num_prefill_tokens,
+            max_query_len=int(q_seqlens.max().item()),
+            query_start_loc=q_start_loc - q_start_loc[0],
+            max_seq_len=int(kv_seqlens.max().item()),
+            seq_lens=kv_seqlens,
+            block_table=self.block_table[self.num_decodes:],
+            slot_mapping=self.slot_mapping[self.num_decode_tokens:],
+        )
+        return self._cached_prefill_metadata
+
+    @property
+    def decode_metadata(self) -> Optional["TreeAttentionMetadata"]:
+        if self.num_decode_tokens == 0:
+            return None
+
+        if self._cached_decode_metadata is not None:
+            # Recover cached decode-phase attention
+            # metadata structure
+            return self._cached_decode_metadata
+
+        q_start_loc = self.query_start_loc[:self.num_decodes + 1]
+        q_seqlens = torch.diff(q_start_loc)
+        kv_seqlens = self.seq_lens[:self.num_decodes]
+        # Construct & cache decode-phase attention metadata structure
+        self._cached_decode_metadata = TreeAttentionMetadata(
+            num_actual_tokens=self.num_decode_tokens,
+            max_query_len=int(q_seqlens.max().item()),
+            query_start_loc=q_start_loc,
+            max_seq_len=int(kv_seqlens.max().item()),
+            seq_lens=kv_seqlens,
+            block_table=self.block_table[:self.num_decodes],
+            slot_mapping=self.slot_mapping[:self.num_decode_tokens],
+            tree_attn_bias=self.tree_attn_bias,
+        )
+        return self._cached_decode_metadata
+
+
+class TreeAttentionMetadataBuilder(
+        AttentionMetadataBuilder[TreeAttentionMetadata]):
+
+    def __init__(
+        self,
+        kv_cache_spec: AttentionSpec,
+        layer_names: list[str],
+        vllm_config: VllmConfig,
+        device: torch.device,
+    ):
+        self.kv_cache_spec = kv_cache_spec
+        self.block_size = kv_cache_spec.block_size
+
+        spec_config = vllm_config.speculative_config
+        spec_token_tree = (spec := spec_config) and spec.speculative_token_tree
+        tree_choices: list[tuple[int,
+                                 ...]] = (ast.literal_eval(spec_token_tree)
+                                          if spec_token_tree is not None else
+                                          [(0, )])
+        # Construct the tree attention bias.
+        depth_counts = _get_depth_counts(tree_choices)
+        self.tree_attn_bias = _prepare_tree_attn_bias(
+            tree_choices,
+            depth_counts,
+            dtype=torch.float32,
+            device=device,
+        )
+
+    def reorder_batch(self, input_batch: "InputBatch",
+                      scheduler_output: "SchedulerOutput") -> bool:
+        return reorder_batch_to_split_decodes_and_prefills(
+            input_batch,
+            scheduler_output,
+            decode_threshold=self.tree_attn_bias.shape[0])
+
+    def build(
+        self,
+        common_prefix_len: int,
+        common_attn_metadata: CommonAttentionMetadata,
+        fast_build: bool = False,
+    ) -> TreeAttentionMetadata:
+        decode_threshold = self.tree_attn_bias.shape[0]
+        num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = (
+            split_decodes_and_prefills(common_attn_metadata,
+                                       decode_threshold=decode_threshold))
+
+        num_actual_tokens = common_attn_metadata.num_actual_tokens
+        q_start_loc = common_attn_metadata.query_start_loc
+        max_query_len = common_attn_metadata.max_query_len
+        kv_seqlens = common_attn_metadata.seq_lens
+        max_seq_len = int(common_attn_metadata.seq_lens_cpu.max())
+        block_table = common_attn_metadata.block_table_tensor
+        slot_mapping = common_attn_metadata.slot_mapping
+
+        return TreeAttentionMetadata(
+            num_actual_tokens=num_actual_tokens,
+            num_prefill_tokens=num_prefill_tokens,
+            num_decode_tokens=num_decode_tokens,
+            num_prefills=num_prefills,
+            num_decodes=num_decodes,
+            max_query_len=max_query_len,
+            query_start_loc=q_start_loc,
+            max_seq_len=max_seq_len,
+            seq_lens=kv_seqlens,
+            block_table=block_table,
+            slot_mapping=slot_mapping,
+            tree_attn_bias=self.tree_attn_bias,
+        )
+
+    def build_for_drafting(
+        self,
+        common_attn_metadata: CommonAttentionMetadata,
+        draft_index: int,
+    ) -> TreeAttentionMetadata:
+        # Cache the original tree attention bias.
+        orig_tree_attn_bias = self.tree_attn_bias
+
+        if draft_index == 0:
+            # Use prefill for drafting at the root level.
+            self.tree_attn_bias = torch.empty(0)
+        else:
+            # Slice the tree attention bias for drafting.
+            query_len = common_attn_metadata.max_query_len
+            start, end = draft_index, draft_index + query_len
+            self.tree_attn_bias = self.tree_attn_bias[start:end,
+                                                      start:end].contiguous()
+
+        # Build attention bias.
+        attn_metadata = self.build(0, common_attn_metadata, fast_build=True)
+
+        # Reset the tree attention bias to the original value.
+        self.tree_attn_bias = orig_tree_attn_bias
+        return attn_metadata
+
+
+def _get_depth_counts(sorted_tree_choices: list[tuple[int, ...]]) -> list[int]:
+    # Count the number of choices at each depth of the tree.
+    depth_counts = []
+    prev_depth = 0
+    for path in sorted_tree_choices:
+        depth = len(path)
+        if depth != prev_depth:
+            depth_counts.append(0)
+        depth_counts[depth - 1] += 1
+        prev_depth = depth
+    return depth_counts
+
+
+def _prepare_tree_attn_bias(
+    sorted_tree_choices: list[tuple[int, ...]],
+    depth_counts: list[int],
+    dtype: Optional[torch.dtype],
+    device: Optional[torch.device],
+) -> torch.Tensor:
+    # +1 comes from the additional root node.
+    tree_len = len(sorted_tree_choices) + 1
+    tree_attn_mask = torch.full((tree_len, tree_len),
+                                -torch.inf,
+                                device=device,
+                                dtype=dtype)
+
+    # Set diagonal to all zeros. Each token should
+    # attend to itself.
+    mask_val = 0
+    for i in range(tree_len):
+        tree_attn_mask[i, i] = mask_val
+
+    # Set root to all zeros. All tokens attend to it.
+    tree_attn_mask[:, 0] = mask_val
+
+    # Set all ancestors to zeros.
+    start = 0
+    for i in range(len(depth_counts)):
+        for j in range(depth_counts[i]):
+            cur_tree_choice = sorted_tree_choices[start + j]
+            # Retrieve ancestor position.
+            if len(cur_tree_choice) == 1:
+                continue
+            ancestor_idx = []
+            for c in range(len(cur_tree_choice) - 1):
+                ancestor_idx.append(
+                    sorted_tree_choices.index(cur_tree_choice[:c + 1]) + 1)
+            tree_attn_mask[j + start + 1, ancestor_idx] = mask_val
+        start += depth_counts[i]
+    return tree_attn_mask
+
+
+class TreeAttentionImpl(AttentionImpl):
+
+    def __init__(
+        self,
+        num_heads: int,
+        head_size: int,
+        scale: float,
+        num_kv_heads: int,
+        alibi_slopes: Optional[list[float]],
+        sliding_window: Optional[int],
+        kv_cache_dtype: str,
+        blocksparse_params: Optional[dict[str, Any]] = None,
+        logits_soft_cap: Optional[float] = None,
+        attn_type: AttentionType = AttentionType.DECODER,
+        kv_sharing_target_layer_name: Optional[str] = None,
+        use_irope: bool = False,
+    ) -> None:
+        if blocksparse_params is not None:
+            raise ValueError(
+                "TreeAttention does not support block-sparse attention.")
+        self.num_heads = num_heads
+        self.head_size = head_size
+        self.scale = float(scale)
+        self.num_kv_heads = num_kv_heads
+        self.num_queries_per_kv = self.num_heads // self.num_kv_heads
+        self.kv_cache_dtype = kv_cache_dtype
+        self.kv_sharing_target_layer_name = kv_sharing_target_layer_name
+        if alibi_slopes is not None:
+            alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32)
+        self.alibi_slopes = alibi_slopes
+        if logits_soft_cap is None:
+            # Setting logits_soft_cap to 0 means no soft cap.
+            logits_soft_cap = 0
+        self.logits_soft_cap = logits_soft_cap
+        if sliding_window is None:
+            self.sliding_window = (-1, -1)
+        else:
+            self.sliding_window = (sliding_window - 1, 0)
+
+        TreeAttentionBackend.validate_head_size(head_size)
+
+        if attn_type != AttentionType.DECODER:
+            raise NotImplementedError("Encoder self-attention and "
+                                      "encoder/decoder cross-attention "
+                                      "are not implemented for "
+                                      "TreeAttentionImpl.")
+
+    def forward(
+        self,
+        layer: torch.nn.Module,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        kv_cache: torch.Tensor,
+        attn_metadata: TreeAttentionMetadata,
+        output: Optional[torch.Tensor] = None,
+        output_scale: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """Forward pass with TreeAttention.
+
+        Args:
+            query: shape = [num_tokens, num_heads, head_size]
+            key: shape = [num_tokens, num_kv_heads, head_size]
+            value: shape = [num_tokens, num_kv_heads, head_size]
+            kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]
+            attn_metadata: Metadata for attention.
+        Returns:
+            shape = [num_tokens, num_heads * head_size]
+        """
+        assert output is not None, "Output tensor must be provided."
+
+        if output_scale is not None:
+            raise NotImplementedError(
+                "fused output quantization is not yet supported"
+                " for TreeAttentionImpl")
+
+        if attn_metadata is None:
+            # Profiling run.
+            return output
+
+        # Cache the input KVs.
+        key_cache, value_cache = kv_cache.unbind(0)
+        if self.kv_sharing_target_layer_name is None:
+            # Reshape the input keys and values and store them in the cache.
+            # Skip this if sharing KV cache with an earlier attention layer.
+            # NOTE(woosuk): Here, key and value are padded while slot_mapping is
+            # not padded. However, we don't need to do key[:num_actual_tokens]
+            # and value[:num_actual_tokens] because the reshape_and_cache_flash
+            # op uses the slot_mapping's shape to determine the number of
+            # actual tokens.
+            ops.reshape_and_cache_flash(
+                key,
+                value,
+                key_cache,
+                value_cache,
+                attn_metadata.slot_mapping,
+                self.kv_cache_dtype,
+                layer._k_scale,
+                layer._v_scale,
+            )
+
+        num_actual_tokens = attn_metadata.num_actual_tokens
+        num_decode_tokens = attn_metadata.num_decode_tokens
+        descale_shape = (attn_metadata.query_start_loc.shape[0] - 1,
+                         key.shape[1])
+        if prefill_meta := attn_metadata.prefill_metadata:
+            unified_attention(
+                q=query[num_decode_tokens:num_actual_tokens],
+                k=key_cache,
+                v=value_cache,
+                out=output[num_decode_tokens:num_actual_tokens],
+                cu_seqlens_q=prefill_meta.query_start_loc,
+                max_seqlen_q=prefill_meta.max_query_len,
+                seqused_k=prefill_meta.seq_lens,
+                max_seqlen_k=prefill_meta.max_seq_len,
+                softmax_scale=self.scale,
+                causal=True,
+                alibi_slopes=self.alibi_slopes,
+                window_size=self.sliding_window,
+                block_table=prefill_meta.block_table,
+                softcap=self.logits_soft_cap,
+                q_descale=None,  # Not supported
+                k_descale=layer._k_scale.expand(descale_shape),
+                v_descale=layer._v_scale.expand(descale_shape),
+            )
+
+        if decode_meta := attn_metadata.decode_metadata:
+            unified_attention(
+                q=query[:num_decode_tokens],
+                k=key_cache,
+                v=value_cache,
+                out=output[:num_decode_tokens],
+                cu_seqlens_q=decode_meta.query_start_loc,
+                max_seqlen_q=decode_meta.max_query_len,
+                seqused_k=decode_meta.seq_lens,
+                max_seqlen_k=decode_meta.max_seq_len,
+                softmax_scale=self.scale,
+                causal=True,
+                alibi_slopes=self.alibi_slopes,
+                qq_bias=decode_meta.tree_attn_bias,
+                window_size=self.sliding_window,
+                block_table=decode_meta.block_table,
+                softcap=self.logits_soft_cap,
+                q_descale=None,  # Not supported
+                k_descale=layer._k_scale.expand(descale_shape),
+                v_descale=layer._v_scale.expand(descale_shape),
+            )
+        return output
diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index 48bd632227c5b..7aeea40b25a67 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -214,6 +214,26 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]):
         return self.build(common_prefix_len=0,
                           common_attn_metadata=common_attn_metadata)
 
+    def build_for_drafting(
+        self,
+        common_attn_metadata: CommonAttentionMetadata,
+        draft_index: int,
+    ) -> M:
+        """
+        Build attention metadata for draft model. Uses build by default.
+        
+        Args:
+            common_attn_metadata: The common attention metadata.
+            draft_index: The index of the current draft operation.
+                When speculating a chain of tokens, this index refers to the
+                draft attempt for the i-th token.
+                For tree-based attention, this index instead refers to the
+                draft attempt for the i-th level in the tree of tokens.
+        """
+        return self.build(common_prefix_len=0,
+                          common_attn_metadata=common_attn_metadata,
+                          fast_build=True)
+
     def use_cascade_attention(
         self,
         common_prefix_len: int,
diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
index 302126dbe3d5f..b2380bb3dd5ab 100644
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import ast
+from dataclasses import replace
 from typing import Optional
 
 import numpy as np
@@ -17,6 +19,8 @@ from vllm.model_executor.models import supports_multimodal
 from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
 from vllm.utils import is_pin_memory_available
 from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
+from vllm.v1.attention.backends.tree_attn import (TreeAttentionMetadata,
+                                                  TreeAttentionMetadataBuilder)
 from vllm.v1.attention.backends.utils import CommonAttentionMetadata
 from vllm.v1.kv_cache_interface import KVCacheConfig
 from vllm.v1.sample.metadata import SamplingMetadata
@@ -74,18 +78,52 @@ class EagleProposer:
             (self.max_num_tokens, self.hidden_size),
             dtype=self.dtype,
             device=device)
-        # We need +1 here because the arange is used to set query_start_loc,
-        # which has one more element than batch_size.
-        self.arange = torch.arange(vllm_config.scheduler_config.max_num_seqs +
-                                   1,
-                                   device=device,
-                                   dtype=torch.int32)
+
+        max_batch_size = vllm_config.scheduler_config.max_num_seqs
+        self.arange = torch.arange(
+            # We need +1 here because the arange is used to set query_start_loc,
+            # which has one more element than batch_size.
+            max_batch_size + 1,
+            device=device,
+            dtype=torch.int32,
+        )
 
         self.inputs_embeds = torch.zeros(
             (self.max_num_tokens, self.hidden_size),
             dtype=self.dtype,
             device=device)
 
+        # Parse the speculative token tree.
+        spec_token_tree = self.speculative_config.speculative_token_tree
+        self.tree_choices: list[tuple[int,
+                                      ...]] = ast.literal_eval(spec_token_tree)
+        tree_depth = len(self.tree_choices[-1])
+        # Precompute per-level properties of the tree.
+        num_drafts_per_level = [0] * tree_depth
+        for node in self.tree_choices:
+            num_drafts_per_level[len(node) - 1] += 1
+        self.cu_drafts_per_level = [num_drafts_per_level[0]]
+        self.child_drafts_per_level = [num_drafts_per_level[0]]
+        for level in range(1, tree_depth):
+            self.cu_drafts_per_level.append(self.cu_drafts_per_level[-1] +
+                                            num_drafts_per_level[level])
+            self.child_drafts_per_level.append(num_drafts_per_level[level] //
+                                               num_drafts_per_level[level - 1])
+        # Find the first level where the tree branches off into one or more
+        # children.
+        self.first_branching_level = None
+        for level in range(tree_depth):
+            if self.cu_drafts_per_level[level] > level + 1:
+                self.first_branching_level = level
+                break
+        # Precompute draft position offsets in flattened tree.
+        self.tree_draft_pos_offsets = torch.arange(
+            1,
+            len(self.tree_choices) + 1,
+            device=device,
+            dtype=torch.int32,
+        ).repeat(max_batch_size, 1)
+
     def propose(
         self,
         # [num_tokens]
@@ -120,11 +158,9 @@ class EagleProposer:
         assert self.runner is not None
 
         # FIXME: need to consider multiple kv_cache_groups
-        attn_metadata = self.runner.attn_metadata_builders[0].build(
-            common_prefix_len=0,
-            common_attn_metadata=common_attn_metadata,
-            fast_build=True,
-        )
+        attn_metadata = self.runner.attn_metadata_builders[
+            0].build_for_drafting(common_attn_metadata=common_attn_metadata,
+                                  draft_index=0)
 
         # At this moment, we assume all eagle layers belong to the same KV
         # cache group, thus using the same attention metadata.
@@ -167,6 +203,22 @@ class EagleProposer:
                 last_hidden_states, hidden_states = ret_hidden_states
         sample_hidden_states = last_hidden_states[last_token_indices]
         logits = self.model.compute_logits(sample_hidden_states, None)
+        positions = target_positions[last_token_indices]
+        hidden_states = hidden_states[last_token_indices]
+        if self.first_branching_level == 0:
+            # Branching has occurred at the root level. Draft using tree
+            # attention.
+            draft_token_ids_list = self.propose_tree(
+                tree_root_level=0,
+                batch_size=batch_size,
+                logits=logits,
+                positions=positions,
+                hidden_states=hidden_states,
+                common_attn_metadata=common_attn_metadata,
+            )
+            # [batch_size, num_tree_tokens]
+            return torch.cat(draft_token_ids_list, dim=1)
+
         draft_token_ids = logits.argmax(dim=-1)
 
         # Early exit if there is only one draft token to be generated.
@@ -178,16 +230,15 @@ class EagleProposer:
         # one layer. Adapt this code to support multiple layers once
         # there's a multi-layer MTP module.
 
-        # Currently FlashAttention is the only backend that supports
-        # multi-token eagle spec decode. This is because the code below
+        # Currently, only FlashAttention and TreeAttention support multi-token
+        # eagle spec decode. This is because the code below
         # makes assumptions about attn_metadata attributes available.
-        assert isinstance(attn_metadata, FlashAttentionMetadata)
+        assert isinstance(attn_metadata,
+                          (FlashAttentionMetadata, TreeAttentionMetadata))
 
         # Generate the remaining draft tokens.
         draft_token_ids_list = [draft_token_ids]
 
-        positions = target_positions[last_token_indices]
-        hidden_states = hidden_states[last_token_indices]
         if self.use_cuda_graph and \
             batch_size <= self.cudagraph_batch_sizes[-1]:
             input_batch_size = self.vllm_config.pad_for_cudagraph(batch_size)
@@ -196,7 +247,7 @@ class EagleProposer:
         attn_metadata.num_actual_tokens = batch_size
         attn_metadata.max_query_len = 1
         attn_metadata.query_start_loc = self.arange[:batch_size + 1]
-        for _ in range(self.num_speculative_tokens - 1):
+        for token_index in range(self.num_speculative_tokens - 1):
             # Update the inputs.
             # cast to int32 is crucial when eagle model is compiled.
             # tensor.argmax() returns int64 by default.
@@ -265,7 +316,20 @@ class EagleProposer:
             logits = self.model.compute_logits(last_hidden_states[:batch_size],
                                                None)
 
-            # TODO(wenlong): get more than one token for tree attention
+            if self.first_branching_level == token_index + 1:
+                # Branching has occurred. The remaining tokens are drafted
+                # using tree attention.
+                draft_token_ids_list += self.propose_tree(
+                    tree_root_level=token_index + 1,
+                    batch_size=batch_size,
+                    logits=logits,
+                    positions=positions,
+                    hidden_states=hidden_states,
+                    common_attn_metadata=common_attn_metadata,
+                )
+                # [batch_size, num_tree_tokens]
+                return torch.cat(draft_token_ids_list, dim=1)
+
             draft_token_ids = logits.argmax(dim=-1)
             draft_token_ids_list.append(draft_token_ids)
 
@@ -273,6 +337,175 @@ class EagleProposer:
         draft_token_ids = torch.stack(draft_token_ids_list, dim=1)
         return draft_token_ids
 
+    def propose_tree(
+        self,
+        tree_root_level: int,
+        batch_size: int,
+        # [num_tokens, vocab_size]
+        logits: torch.Tensor,
+        # [num_tokens]
+        positions: torch.Tensor,
+        # [num_tokens, hidden_size]
+        hidden_states: torch.Tensor,
+        common_attn_metadata: CommonAttentionMetadata,
+    ) -> list[torch.Tensor]:
+        tree_attn_metadata_builder = self.runner.attn_metadata_builders[0]
+        assert isinstance(tree_attn_metadata_builder,
+                          TreeAttentionMetadataBuilder)
+
+        total_num_drafts = self.cu_drafts_per_level[tree_root_level]
+        level_num_drafts = total_num_drafts
+        # Sample a draft token for each child at the tree root level.
+        num_children = self.child_drafts_per_level[tree_root_level]
+        if num_children == 1:
+            draft_token_ids = logits.argmax(dim=-1).view(batch_size, -1)
+        else:
+            draft_token_ids = torch.topk(logits, num_children,
+                                         dim=-1).indices.view(batch_size, -1)
+        draft_token_ids_list = [draft_token_ids]
+        draft_hidden_states = hidden_states.view(batch_size, 1, -1)
+
+        # Initialize empty tensors for concatenation with the level outputs.
+        tree_input_ids = torch.empty(0,
+                                     device=self.input_ids.device,
+                                     dtype=self.input_ids.dtype)
+        tree_positions = torch.empty(0,
+                                     device=self.positions.device,
+                                     dtype=self.positions.dtype)
+        tree_hidden_states = torch.empty(0,
+                                         device=self.hidden_states.device,
+                                         dtype=self.hidden_states.dtype)
+        # Precompute the draft token positions.
+        flattened_draft_positions = (
+            positions.view(batch_size, -1) +
+            self.tree_draft_pos_offsets[:batch_size, :])
+        tree_depth = len(self.cu_drafts_per_level)
+        for level in range(tree_root_level, tree_depth - 1):
+            # Get draft positions for RoPE.
+            draft_positions = positions + (level + 1)
+            exceeds_max_model_len = (positions +
+                                     total_num_drafts) >= self.max_model_len
+            # Mask out the position ids that exceed the max model length.
+            # Otherwise, we may get out-of-range error in RoPE.
+            clamped_draft_positions = torch.where(
+                exceeds_max_model_len,
+                0,
+                draft_positions,
+            )
+            if level_num_drafts > 1:
+                # Repeat the positions for each draft at this level.
+                draft_positions = clamped_draft_positions.repeat_interleave(
+                    level_num_drafts).reshape(batch_size, -1)
+
+            if num_children > 1:
+                # Repeat draft hidden states for each child.
+                draft_hidden_states = draft_hidden_states.repeat_interleave(
+                    num_children, dim=1)
+
+            # Concatenate the draft tokens, positions, and hidden states.
+            tree_input_ids = torch.cat([tree_input_ids, draft_token_ids],
+                                       dim=1)
+            tree_positions = torch.cat([tree_positions, draft_positions],
+                                       dim=1)
+            tree_hidden_states = torch.cat(
+                [tree_hidden_states, draft_hidden_states], dim=1)
+
+            # Build new attention metadata for the next level of drafts.
+            # This is necessary to support tree attention.
+            query_len = total_num_drafts - tree_root_level
+            common_attn_metadata = replace(
+                common_attn_metadata,
+                query_start_loc=query_len * self.arange[:batch_size + 1],
+                seq_lens=common_attn_metadata.seq_lens + level_num_drafts,
+                num_actual_tokens=batch_size * query_len,
+                max_query_len=query_len,
+            )
+            attn_metadata = tree_attn_metadata_builder.build_for_drafting(
+                common_attn_metadata=common_attn_metadata,
+                draft_index=tree_root_level + 1,
+            )
+
+            # Apply new attention metadata to all layers.
+            per_layer_attn_metadata = {}
+            for layer_name in self.attn_layer_names:
+                per_layer_attn_metadata[layer_name] = attn_metadata
+
+            # Consider max model length.
+            attn_metadata.max_seq_len = min(attn_metadata.max_seq_len,
+                                            self.max_model_len)
+            # For the requests that exceed the max model length, we set the
+            # sequence length to 1 to minimize their overheads in attention.
+            attn_metadata.seq_lens.masked_fill_(exceeds_max_model_len, 1)
+
+            # Compute the slot mapping.
+            query_positions = flattened_draft_positions[:, level:level +
+                                                        query_len]
+            block_numbers = query_positions // self.block_size
+            block_ids = attn_metadata.block_table.gather(dim=1,
+                                                         index=block_numbers)
+            slot_mapping = (block_ids * self.block_size +
+                            query_positions % self.block_size)
+            # Mask out the slot mappings that exceed the max model length.
+            # Otherwise, the KV cache will be inadvertently updated with the
+            # padding tokens.
+            slot_mapping[exceeds_max_model_len] = PADDING_SLOT_ID
+            attn_metadata.slot_mapping = slot_mapping.view(-1)
+
+            # Copy inputs to buffer for cudagraph.
+            num_tokens = attn_metadata.num_actual_tokens
+            input_ids = tree_input_ids.view(-1)
+            self.input_ids[:num_tokens] = input_ids
+            self.positions[:num_tokens] = tree_positions.view(-1)
+            self.hidden_states[:num_tokens] = tree_hidden_states.view(
+                num_tokens, -1)
+
+            if self.use_cuda_graph and \
+                num_tokens <= self.cudagraph_batch_sizes[-1]:
+                num_input_tokens = self.vllm_config.pad_for_cudagraph(
+                    num_tokens)
+            else:
+                num_input_tokens = num_tokens
+            # Run the model.
+            with set_forward_context(per_layer_attn_metadata,
+                                     self.vllm_config,
+                                     num_tokens=num_input_tokens):
+                last_hidden_states, hidden_states = self.model(
+                    input_ids=self.input_ids[:num_input_tokens],
+                    positions=self.positions[:num_input_tokens],
+                    hidden_states=self.hidden_states[:num_input_tokens],
+                    inputs_embeds=None,
+                )
+
+            # Get the output hidden states for the draft tokens.
+            draft_hidden_states = hidden_states[:num_tokens].view(
+                batch_size, query_len, -1)[:, -level_num_drafts:]
+            draft_last_hidden_states = last_hidden_states[:num_tokens].view(
+                batch_size, query_len, -1)[:, -level_num_drafts:]
+
+            # Get the output logits for the draft tokens.
+            logits = self.model.compute_logits(
+                draft_last_hidden_states.reshape(batch_size * level_num_drafts,
+                                                 -1),
+                None,
+            )
+
+            # Sample a draft token for each child at the next tree level.
+            num_children = self.child_drafts_per_level[level + 1]
+            if num_children == 1:
+                draft_token_ids = logits.argmax(dim=-1).view(batch_size, -1)
+            else:
+                draft_token_ids = torch.topk(logits, num_children,
+                                             dim=-1).indices.view(
+                                                 batch_size, -1)
+            draft_token_ids_list.append(draft_token_ids)
+
+            # Update the # drafts counters for the next tree level.
+            level_num_drafts = self.cu_drafts_per_level[level +
+                                                        1] - total_num_drafts
+            total_num_drafts = self.cu_drafts_per_level[level + 1]
+
+        return draft_token_ids_list
+
     def prepare_inputs(
         self,
         common_attn_metadata: CommonAttentionMetadata,

From 49bcd893e753d89a1c2a95a1c2649819309c1e1b Mon Sep 17 00:00:00 2001
From: "ZiTian.Zhao" <zitian.zhao@tencentmusic.com>
Date: Mon, 4 Aug 2025 13:14:49 +0800
Subject: [PATCH 186/224] [refactor] improve ConstantList exception specificity
 (#22156)

Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com>
---
 vllm/v1/utils.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py
index d0175695c1d0f..b5750c82db023 100644
--- a/vllm/v1/utils.py
+++ b/vllm/v1/utils.py
@@ -34,22 +34,22 @@ class ConstantList(Generic[T], Sequence):
         self._x = x
 
     def append(self, item):
-        raise Exception("Cannot append to a constant list")
+        raise TypeError("Cannot append to a constant list")
 
     def extend(self, item):
-        raise Exception("Cannot extend a constant list")
+        raise TypeError("Cannot extend a constant list")
 
     def insert(self, item):
-        raise Exception("Cannot insert into a constant list")
+        raise TypeError("Cannot insert into a constant list")
 
     def pop(self, item):
-        raise Exception("Cannot pop from a constant list")
+        raise TypeError("Cannot pop from a constant list")
 
     def remove(self, item):
-        raise Exception("Cannot remove from a constant list")
+        raise TypeError("Cannot remove from a constant list")
 
     def clear(self):
-        raise Exception("Cannot clear a constant list")
+        raise TypeError("Cannot clear a constant list")
 
     def index(self,
               item: T,
@@ -78,10 +78,10 @@ class ConstantList(Generic[T], Sequence):
         ...
 
     def __setitem__(self, item: Union[int, slice], value: Union[T, list[T]]):
-        raise Exception("Cannot set item in a constant list")
+        raise TypeError("Cannot set item in a constant list")
 
     def __delitem__(self, item):
-        raise Exception("Cannot delete item from a constant list")
+        raise TypeError("Cannot delete item from a constant list")
 
     def __iter__(self):
         return iter(self._x)

From e5949e5ae013692ba09cc52472cf441675f5a270 Mon Sep 17 00:00:00 2001
From: Chenxi Yang <cxyang@cs.utexas.edu>
Date: Sun, 3 Aug 2025 22:15:14 -0700
Subject: [PATCH 187/224] Remove index_put from MM embeddings merging (#22105)

Co-authored-by: Chenxi Yang <cxyang@meta.com>
---
 vllm/model_executor/models/utils.py | 42 ++++++++++++++++-------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py
index 62deb68035b92..28508e1bac1ee 100644
--- a/vllm/model_executor/models/utils.py
+++ b/vllm/model_executor/models/utils.py
@@ -393,7 +393,7 @@ def merge_multimodal_embeddings_from_map(
         inputs_embeds: torch.Tensor, multimodal_embeddings: NestedTensors,
         placeholder_map: MultiModalPlaceholderMap.IndexMap) -> torch.Tensor:
     """
-    Merge ``multimodal_embeddings`` into ``inputs_embeds`` using the provided 
+    Merge ``multimodal_embeddings`` into ``inputs_embeds`` using the provided
     placeholder map .
 
     Note:
@@ -418,17 +418,23 @@ def _merge_multimodal_embeddings(
     Note:
         This updates ``inputs_embeds`` in place.
     """
-    num_expected_tokens = is_multimodal.sum().item()
-    assert isinstance(num_expected_tokens, int)
-
     flattened = _flatten_embeddings(multimodal_embeddings)
-    if flattened.shape[0] != num_expected_tokens:
-        expr = _embedding_count_expression(multimodal_embeddings)
-        raise ValueError(
-            f"Attempted to assign {expr} = {flattened.shape[0]} "
-            f"multimodal tokens to {num_expected_tokens} placeholders")
+    try:
+        # This is equivalent to: inputs_embeds[is_multimodal] = flattened.
+        inputs_embeds.masked_scatter_(is_multimodal.unsqueeze(-1), flattened)
+    except RuntimeError as e:
+        num_expected_tokens = is_multimodal.sum().item()
+        assert isinstance(num_expected_tokens, int)
+
+        if flattened.shape[0] != num_expected_tokens:
+            expr = _embedding_count_expression(multimodal_embeddings)
+            raise ValueError(
+                f"Attempted to assign {expr} = {flattened.shape[0]} "
+                f"multimodal tokens to {num_expected_tokens} placeholders"
+            ) from e
+        else:
+            raise ValueError("Error during masked scatter operation") from e
 
-    inputs_embeds[is_multimodal] = flattened
     return inputs_embeds
 
 
@@ -478,11 +484,11 @@ def merge_multimodal_embeddings(
     Merge ``multimodal_embeddings`` into ``inputs_embeds`` by overwriting the
     positions in ``inputs_embeds`` corresponding to placeholder tokens in
     ``input_ids``.
-    
-    ``placeholder_token_id`` can be a list of token ids (e.g, token ids 
-    of img_start, img_break, and img_end tokens) when needed: This means 
-    the order of these tokens in the ``input_ids`` MUST MATCH the order of 
-    their embeddings in ``multimodal_embeddings`` since we need to 
+
+    ``placeholder_token_id`` can be a list of token ids (e.g, token ids
+    of img_start, img_break, and img_end tokens) when needed: This means
+    the order of these tokens in the ``input_ids`` MUST MATCH the order of
+    their embeddings in ``multimodal_embeddings`` since we need to
     slice-merge instead of individually scattering.
 
     For example, if input_ids is "TTTTTSIIIBIIIBIIIETTT", where
@@ -491,9 +497,9 @@ def merge_multimodal_embeddings(
     - I is image embedding token
     - B is image break token
     - E is image end token.
-    
-    Then the image embeddings (that correspond to I's) from vision encoder 
-    must be padded with embeddings of S, B, and E in the same order of 
+
+    Then the image embeddings (that correspond to I's) from vision encoder
+    must be padded with embeddings of S, B, and E in the same order of
     input_ids for a correct embedding merge.
 
     Note:

From 8ecb3e9e9336ce47e47b61417e24161b38079e93 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Mon, 4 Aug 2025 01:19:04 -0400
Subject: [PATCH 188/224] [CI Bugfix] Fix wNa16 kernel not found for
 test_shared_storage_connector_hashes (#22163)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
---
 tests/v1/kv_connector/unit/test_shared_storage_connector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/v1/kv_connector/unit/test_shared_storage_connector.py b/tests/v1/kv_connector/unit/test_shared_storage_connector.py
index 11b7e378441a4..db203b81f15fc 100644
--- a/tests/v1/kv_connector/unit/test_shared_storage_connector.py
+++ b/tests/v1/kv_connector/unit/test_shared_storage_connector.py
@@ -10,7 +10,7 @@ from vllm.assets.image import ImageAsset
 from vllm.config import KVTransferConfig
 from vllm.multimodal.utils import encode_image_base64
 
-MODEL_NAME = "RedHatAI/Qwen2.5-VL-3B-Instruct-quantized.w4a16"
+MODEL_NAME = "RedHatAI/Qwen2.5-VL-3B-Instruct-quantized.w8a8"
 
 SAMPLING_PARAMS = SamplingParams(temperature=0.0, top_k=1, max_tokens=128)
 

From a7b8788d2c2fae6bf52c128916de19e85f2b0a25 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Mon, 4 Aug 2025 14:51:20 +0800
Subject: [PATCH 189/224] [Misc] Modify the organization of GLM series 
 (#22171)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 docs/models/supported_models.md                    | 10 +++++-----
 examples/offline_inference/vision_language.py      |  4 ++--
 tests/distributed/test_pipeline_parallel.py        |  4 ++--
 tests/lora/test_add_lora.py                        |  2 +-
 tests/lora/test_chatglm3_tp.py                     |  2 +-
 tests/models/language/generation/test_common.py    |  2 +-
 tests/models/multimodal/generation/test_common.py  |  6 +++---
 tests/models/multimodal/processing/test_common.py  |  4 ++--
 tests/models/multimodal/processing/test_glm4_1v.py |  2 +-
 tests/models/registry.py                           | 10 +++++-----
 tests/tokenization/test_cached_tokenizer.py        |  2 +-
 vllm/model_executor/models/chatglm.py              |  6 +++---
 vllm/model_executor/models/glm4v.py                |  2 +-
 vllm/test_utils.py                                 |  2 +-
 vllm/transformers_utils/configs/chatglm.py         |  2 +-
 vllm/transformers_utils/tokenizer.py               |  2 +-
 16 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index c058c20f1ed73..cd1228836b870 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -328,7 +328,7 @@ th {
 | `BambaForCausalLM` | Bamba | `ibm-ai-platform/Bamba-9B-fp8`, `ibm-ai-platform/Bamba-9B` | ✅︎ | ✅︎ | ✅︎ |
 | `BloomForCausalLM` | BLOOM, BLOOMZ, BLOOMChat | `bigscience/bloom`, `bigscience/bloomz`, etc. | | ✅︎ | |
 | `BartForConditionalGeneration` | BART | `facebook/bart-base`, `facebook/bart-large-cnn`, etc. | | | |
-| `ChatGLMModel`, `ChatGLMForConditionalGeneration` | ChatGLM | `THUDM/chatglm2-6b`, `THUDM/chatglm3-6b`, `ShieldLM-6B-chatglm3`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `ChatGLMModel`, `ChatGLMForConditionalGeneration` | ChatGLM | `zai-org/chatglm2-6b`, `zai-org/chatglm3-6b`, `ShieldLM-6B-chatglm3`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `CohereForCausalLM`, `Cohere2ForCausalLM` | Command-R | `CohereForAI/c4ai-command-r-v01`, `CohereForAI/c4ai-command-r7b-12-2024`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `DbrxForCausalLM` | DBRX | `databricks/dbrx-base`, `databricks/dbrx-instruct`, etc. | | ✅︎ | ✅︎ |
 | `DeciLMForCausalLM` | DeciLM | `nvidia/Llama-3_3-Nemotron-Super-49B-v1`, etc. | ✅︎ | ✅︎ | ✅︎ |
@@ -348,8 +348,8 @@ th {
 | `Gemma2ForCausalLM` | Gemma 2 | `google/gemma-2-9b`, `google/gemma-2-27b`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `Gemma3ForCausalLM` | Gemma 3 | `google/gemma-3-1b-it`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `Gemma3nForConditionalGeneration` | Gemma 3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | | ✅︎ |
-| `GlmForCausalLM` | GLM-4 | `THUDM/glm-4-9b-chat-hf`, etc. | ✅︎ | ✅︎ | ✅︎ |
-| `Glm4ForCausalLM` | GLM-4-0414 | `THUDM/GLM-4-32B-0414`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `GlmForCausalLM` | GLM-4 | `zai-org/glm-4-9b-chat-hf`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `Glm4ForCausalLM` | GLM-4-0414 | `zai-org/GLM-4-32B-0414`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `GPT2LMHeadModel` | GPT-2 | `gpt2`, `gpt2-xl`, etc. | | ✅︎ | ✅︎ |
 | `GPTBigCodeForCausalLM` | StarCoder, SantaCoder, WizardCoder | `bigcode/starcoder`, `bigcode/gpt_bigcode-santacoder`, `WizardLM/WizardCoder-15B-V1.0`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `GPTJForCausalLM` | GPT-J | `EleutherAI/gpt-j-6b`, `nomic-ai/gpt4all-j`, etc. | | ✅︎ | ✅︎ |
@@ -589,8 +589,8 @@ See [this page](generative_models.md) for more information on how to use generat
 | `Florence2ForConditionalGeneration` | Florence-2 | T + I | `microsoft/Florence-2-base`, `microsoft/Florence-2-large`, etc. | | | |
 | `FuyuForCausalLM` | Fuyu | T + I | `adept/fuyu-8b`, etc. | | ✅︎ | ✅︎ |
 | `Gemma3ForConditionalGeneration` | Gemma 3 | T + I<sup>+</sup> | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | ⚠️ |
-| `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ |
-| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `zai-org/glm-4v-9b`, `zai-org/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `Glm4v_moeForConditionalGeneration` | GLM-4.5V | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5V-Air`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
index a75b8e2b047d8..16bb3712f551e 100644
--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
@@ -221,7 +221,7 @@ def run_gemma3(questions: list[str], modality: str) -> ModelRequestData:
 # GLM-4v
 def run_glm4v(questions: list[str], modality: str) -> ModelRequestData:
     assert modality == "image"
-    model_name = "THUDM/glm-4v-9b"
+    model_name = "zai-org/glm-4v-9b"
 
     engine_args = EngineArgs(
         model=model_name,
@@ -250,7 +250,7 @@ def run_glm4v(questions: list[str], modality: str) -> ModelRequestData:
 
 # GLM-4.1V
 def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData:
-    model_name = "THUDM/GLM-4.1V-9B-Thinking"
+    model_name = "zai-org/GLM-4.1V-9B-Thinking"
 
     engine_args = EngineArgs(
         model=model_name,
diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index cfb2e2dd15f4d..12dd7c4222630 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -154,7 +154,7 @@ TEXT_GENERATION_MODELS = {
     "baichuan-inc/Baichuan-7B": PPTestSettings.fast(),
     "baichuan-inc/Baichuan2-13B-Chat": PPTestSettings.fast(),
     "bigscience/bloomz-1b1": PPTestSettings.fast(),
-    "THUDM/chatglm3-6b": PPTestSettings.fast(),
+    "zai-org/chatglm3-6b": PPTestSettings.fast(),
     "CohereForAI/c4ai-command-r-v01": PPTestSettings.fast(load_format="dummy"),
     "databricks/dbrx-instruct": PPTestSettings.fast(load_format="dummy"),
     "Deci/DeciLM-7B-instruct": PPTestSettings.fast(),
@@ -224,7 +224,7 @@ MULTIMODAL_MODELS = {
     "Salesforce/blip2-opt-6.7b": PPTestSettings.fast(),
     "facebook/chameleon-7b": PPTestSettings.fast(),
     "adept/fuyu-8b": PPTestSettings.fast(),
-    "THUDM/glm-4v-9b": PPTestSettings.fast(),
+    "zai-org/glm-4v-9b": PPTestSettings.fast(),
     "OpenGVLab/InternVL2-1B": PPTestSettings.fast(),
     "llava-hf/llava-1.5-7b-hf": PPTestSettings.fast(),
     "llava-hf/llava-v1.6-mistral-7b-hf": PPTestSettings.fast(),
diff --git a/tests/lora/test_add_lora.py b/tests/lora/test_add_lora.py
index cc8160b2860d9..d7b019509fa3e 100644
--- a/tests/lora/test_add_lora.py
+++ b/tests/lora/test_add_lora.py
@@ -14,7 +14,7 @@ from vllm.lora.request import LoRARequest
 from vllm.sampling_params import SamplingParams
 from vllm.utils import merge_async_iterators
 
-MODEL_PATH = "THUDM/chatglm3-6b"
+MODEL_PATH = "zai-org/chatglm3-6b"
 LORA_RANK = 64
 DEFAULT_MAX_LORAS = 4 * 3
 
diff --git a/tests/lora/test_chatglm3_tp.py b/tests/lora/test_chatglm3_tp.py
index 5481b413b8f5f..fb00e7b65b04a 100644
--- a/tests/lora/test_chatglm3_tp.py
+++ b/tests/lora/test_chatglm3_tp.py
@@ -6,7 +6,7 @@ from vllm.lora.request import LoRARequest
 
 from ..utils import create_new_process_for_each_test, multi_gpu_test
 
-MODEL_PATH = "THUDM/chatglm3-6b"
+MODEL_PATH = "zai-org/chatglm3-6b"
 
 PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:"""  # noqa: E501
 
diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py
index ea240d2278895..57382914bfea8 100644
--- a/tests/models/language/generation/test_common.py
+++ b/tests/models/language/generation/test_common.py
@@ -53,7 +53,7 @@ AITER_MODEL_LIST = [
             marks=[pytest.mark.core_model, pytest.mark.cpu_model],
         ),
         pytest.param(
-            "THUDM/chatglm3-6b",  # chatglm (text-only)
+            "zai-org/chatglm3-6b",  # chatglm (text-only)
         ),
         pytest.param(
             "meta-llama/Llama-3.2-1B-Instruct",  # llama
diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index 967228b54a0af..8cb826c1144d2 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -355,7 +355,7 @@ VLM_TEST_SETTINGS = {
         num_logprobs=10,
     ),
     "glm4v": VLMTestInfo(
-        models=["THUDM/glm-4v-9b"],
+        models=["zai-org/glm-4v-9b"],
         test_type=VLMTestType.IMAGE,
         prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
         single_image_prompts=IMAGE_ASSETS.prompts({
@@ -374,7 +374,7 @@ VLM_TEST_SETTINGS = {
         marks=[large_gpu_mark(min_gb=32)],
     ),
     "glm4_1v": VLMTestInfo(
-        models=["THUDM/GLM-4.1V-9B-Thinking"],
+        models=["zai-org/GLM-4.1V-9B-Thinking"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
         prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
         img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>", # noqa: E501
@@ -388,7 +388,7 @@ VLM_TEST_SETTINGS = {
         marks=[large_gpu_mark(min_gb=32)],
     ),
     "glm4_1v-video": VLMTestInfo(
-        models=["THUDM/GLM-4.1V-9B-Thinking"],
+        models=["zai-org/GLM-4.1V-9B-Thinking"],
         # GLM4.1V require include video metadata for input
         test_type=VLMTestType.CUSTOM_INPUTS,
         max_model_len=4096,
diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py
index f70e03d0f6691..bd1c55d95dac2 100644
--- a/tests/models/multimodal/processing/test_common.py
+++ b/tests/models/multimodal/processing/test_common.py
@@ -271,8 +271,8 @@ def _test_processing_correctness_one(
     "microsoft/Florence-2-base",
     "adept/fuyu-8b",
     "google/gemma-3-4b-it",
-    "THUDM/glm-4v-9b",
-    "THUDM/GLM-4.1V-9B-Thinking",
+    "zai-org/glm-4v-9b",
+    "zai-org/GLM-4.1V-9B-Thinking",
     "ibm-granite/granite-speech-3.3-2b",
     "h2oai/h2ovl-mississippi-800m",
     "internlm/Intern-S1",
diff --git a/tests/models/multimodal/processing/test_glm4_1v.py b/tests/models/multimodal/processing/test_glm4_1v.py
index d1c5fa8fec6d2..a6d900ec5d895 100644
--- a/tests/models/multimodal/processing/test_glm4_1v.py
+++ b/tests/models/multimodal/processing/test_glm4_1v.py
@@ -9,7 +9,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
 from ...utils import build_model_context
 
 
-@pytest.mark.parametrize("model_id", ["THUDM/GLM-4.1V-9B-Thinking"])
+@pytest.mark.parametrize("model_id", ["zai-org/GLM-4.1V-9B-Thinking"])
 @pytest.mark.parametrize("expected_toks_per_frame", [299])
 @pytest.mark.parametrize("num_frames", [32, 128])
 @pytest.mark.parametrize("fps, expected_grid_t", [(1, 5), (2, 10)])
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 25cfa267d1815..ffa6b755adf43 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -153,7 +153,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
                                         extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}),  # noqa: E501
     "BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
                                         {"1b": "bigscience/bloomz-1b1"}),
-    "ChatGLMModel": _HfExamplesInfo("THUDM/chatglm3-6b",
+    "ChatGLMModel": _HfExamplesInfo("zai-org/chatglm3-6b",
                                     trust_remote_code=True,
                                     max_transformers_version="4.48"),
     "ChatGLMForConditionalGeneration": _HfExamplesInfo("thu-coai/ShieldLM-6B-chatglm3",  # noqa: E501
@@ -187,8 +187,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
     "Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it"),
     "Gemma3nForConditionalGeneration": _HfExamplesInfo("google/gemma-3n-E2B-it",    # noqa: E501
                                           min_transformers_version="4.53"),
-    "GlmForCausalLM": _HfExamplesInfo("THUDM/glm-4-9b-chat-hf"),
-    "Glm4ForCausalLM": _HfExamplesInfo("THUDM/GLM-4-9B-0414"),
+    "GlmForCausalLM": _HfExamplesInfo("zai-org/glm-4-9b-chat-hf"),
+    "Glm4ForCausalLM": _HfExamplesInfo("zai-org/GLM-4-9B-0414"),
     "Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5",
                                           min_transformers_version="4.54"),   # noqa: E501
     "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2",
@@ -380,10 +380,10 @@ _MULTIMODAL_EXAMPLE_MODELS = {
     "FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
     "Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"),
     "GraniteSpeechForConditionalGeneration": _HfExamplesInfo("ibm-granite/granite-speech-3.3-2b"),  # noqa: E501
-    "GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
+    "GLM4VForCausalLM": _HfExamplesInfo("zai-org/glm-4v-9b",
                                         trust_remote_code=True,
                                         hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
-    "Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking"),  # noqa: E501
+    "Glm4vForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.1V-9B-Thinking"),  # noqa: E501
     "Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
                                           is_available_online=False),   # noqa: E501
     "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
diff --git a/tests/tokenization/test_cached_tokenizer.py b/tests/tokenization/test_cached_tokenizer.py
index e218678c4363b..07217611ea4d2 100644
--- a/tests/tokenization/test_cached_tokenizer.py
+++ b/tests/tokenization/test_cached_tokenizer.py
@@ -10,7 +10,7 @@ from vllm.transformers_utils.tokenizer import (AnyTokenizer,
                                                get_cached_tokenizer)
 
 
-@pytest.mark.parametrize("model_id", ["gpt2", "THUDM/chatglm3-6b"])
+@pytest.mark.parametrize("model_id", ["gpt2", "zai-org/chatglm3-6b"])
 def test_cached_tokenizer(model_id: str):
     reference_tokenizer = AutoTokenizer.from_pretrained(model_id,
                                                         trust_remote_code=True)
diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py
index 129f0942f14ef..5470ff3e8b612 100644
--- a/vllm/model_executor/models/chatglm.py
+++ b/vllm/model_executor/models/chatglm.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # Adapted from
-# https://github.com/THUDM/ChatGLM2-6B
+# https://github.com/zai-org/ChatGLM2-6B
 """Inference-only ChatGLM model compatible with THUDM weights."""
 import json
 from collections.abc import Iterable
@@ -86,10 +86,10 @@ class GLMAttention(nn.Module):
             prefix=f"{prefix}.dense",
         )
 
-        # https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141
+        # https://huggingface.co/zai-org/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141
         rope_ratio = getattr(config, "rope_ratio", 1.0)
         max_positions = getattr(config, "seq_length", 8192)
-        # NOTE: THUDM/cogagent-9b-20241220 uses original_rope=False,
+        # NOTE: zai-org/cogagent-9b-20241220 uses original_rope=False,
         # which is equivalent to is_neox_style=True
         is_neox_style = not config.original_rope
         self.rotary_emb = get_rope(
diff --git a/vllm/model_executor/models/glm4v.py b/vllm/model_executor/models/glm4v.py
index 537aeabf72d5a..1751fccd08b06 100644
--- a/vllm/model_executor/models/glm4v.py
+++ b/vllm/model_executor/models/glm4v.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 # Adapted from
-# https://github.com/THUDM/CogAgent
+# https://github.com/zai-org/CogAgent
 """Inference-only CogAgent model compatible with THUDM weights."""
 from argparse import Namespace
 from collections.abc import Mapping, Sequence
diff --git a/vllm/test_utils.py b/vllm/test_utils.py
index 1e61ca6b3deaf..23679b8228d6f 100644
--- a/vllm/test_utils.py
+++ b/vllm/test_utils.py
@@ -118,7 +118,7 @@ MODELS_ON_S3 = [
     "stabilityai/stablelm-zephyr-3b",
     "state-spaces/mamba-130m-hf",
     "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ",
-    "THUDM/glm-4v-9b",
+    "zai-org/glm-4v-9b",
     "TIGER-Lab/Mantis-8B-siglip-llama3",
     "TIGER-Lab/VLM2Vec-Full",
     "tiiuae/falcon-40b",
diff --git a/vllm/transformers_utils/configs/chatglm.py b/vllm/transformers_utils/configs/chatglm.py
index 7c5de3e948ed7..176d2b8f63fe4 100644
--- a/vllm/transformers_utils/configs/chatglm.py
+++ b/vllm/transformers_utils/configs/chatglm.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 # Adapted from
-# https://github.com/THUDM/ChatGLM2-6B
+# https://github.com/zai-org/ChatGLM2-6B
 from transformers import PretrainedConfig
 
 
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 6a31a41980695..d2be2ceeeae6d 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -271,7 +271,7 @@ def get_tokenizer(
             }
             tokenizer.add_special_tokens(special_tokens_map)
 
-        # NOTE: We can remove this after https://github.com/THUDM/ChatGLM3/issues/1324
+        # NOTE: We can remove this after https://github.com/zai-org/ChatGLM3/issues/1324
         if type(tokenizer).__name__ in ("ChatGLMTokenizer",
                                         "ChatGLM4Tokenizer"):
             assert isinstance(tokenizer, PreTrainedTokenizer)

From c1b4eb048a286ea5e7bcca730ae5676625f06541 Mon Sep 17 00:00:00 2001
From: Weixiao Huang <hwx.simle@gmail.com>
Date: Mon, 4 Aug 2025 15:43:06 +0800
Subject: [PATCH 190/224] [feat] move WEIGHT_SCALE_SUPPORTED into raise block
 to accelerate RLHF weight loading (#21164)

Signed-off-by: huangweixiao <huangweixiao@msh.team>
---
 vllm/model_executor/layers/fused_moe/layer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index c2039adad99c3..9e7296feeae1e 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -1079,9 +1079,6 @@ class FusedMoE(torch.nn.Module):
             raise ValueError(f"shard_id must be ['w1','w2','w3'] but "
                              f"got {shard_id}.")
 
-        WEIGHT_SCALE_SUPPORTED = [
-            e.value for e in FusedMoeWeightScaleSupported
-        ]
         # Fetch the dim to shard the parameter/loaded weight
         # based on the shard id. This will be whatever
         # dimension intermediate_size_per_partition is used.
@@ -1230,6 +1227,9 @@ class FusedMoE(torch.nn.Module):
                                                    loaded_weight=loaded_weight,
                                                    expert_id=expert_id)
             else:
+                WEIGHT_SCALE_SUPPORTED = [
+                    e.value for e in FusedMoeWeightScaleSupported
+                ]
                 raise ValueError(
                     f"quant method must be one of {WEIGHT_SCALE_SUPPORTED}")
             return True if return_success else None

From fed5849d3fd7a5e7454cf87f101a18c2bad0436f Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Mon, 4 Aug 2025 16:27:02 +0800
Subject: [PATCH 191/224] [Bugfix] Fix failing GGUF models test (#22174)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
---
 vllm/transformers_utils/config.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 0e633c2c0b6ae..cc41a771d06c2 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -290,20 +290,29 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig:
 
 
 def maybe_override_with_speculators_target_model(
-        model: str,
-        tokenizer: str,
-        trust_remote_code: bool,
-        revision: Optional[str] = None) -> tuple[str, str]:
+    model: str,
+    tokenizer: str,
+    trust_remote_code: bool,
+    revision: Optional[str] = None,
+    **kwargs,
+) -> tuple[str, str]:
     """
     If running a speculators config, override running model with target model
     """
+    is_gguf = check_gguf_file(model)
+    if is_gguf:
+        kwargs["gguf_file"] = Path(model).name
+        gguf_model_repo = Path(model).parent
+    else:
+        gguf_model_repo = None
     config_dict, _ = PretrainedConfig.get_config_dict(
-        model,
+        model if gguf_model_repo is None else gguf_model_repo,
         revision=revision,
         trust_remote_code=trust_remote_code,
         token=_get_hf_token(),
+        **kwargs,
     )
-    spec_config = config_dict.get("speculators_config")
+    spec_config = config_dict.get("speculators_config", None)
     # Return the target model
     if spec_config is not None:
         model = tokenizer = spec_config["verifier"]["name_or_path"]

From 54de71d0dfbb6340fdbc620f4ebeb4236d165a37 Mon Sep 17 00:00:00 2001
From: 22quinn <33176974+22quinn@users.noreply.github.com>
Date: Mon, 4 Aug 2025 03:04:12 -0700
Subject: [PATCH 192/224] [Sampler] Support returning all logprobs or logits
 (#21792)

Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
---
 tests/v1/sample/test_logprobs.py  | 27 +++++++++++++++++++++++++++
 vllm/config.py                    |  7 ++++---
 vllm/sampling_params.py           |  6 ++++--
 vllm/v1/engine/logprobs.py        |  5 +++--
 vllm/v1/engine/processor.py       |  5 ++++-
 vllm/v1/worker/gpu_input_batch.py |  4 +++-
 6 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/tests/v1/sample/test_logprobs.py b/tests/v1/sample/test_logprobs.py
index 680e2ce98bb27..8bd142e87b06e 100644
--- a/tests/v1/sample/test_logprobs.py
+++ b/tests/v1/sample/test_logprobs.py
@@ -429,6 +429,33 @@ def test_zero_logprobs(vllm_model, example_prompts,
             assert len(prompt_token_ids) == len(prompt_logprobs)
 
 
+def test_all_logprobs(example_prompts, monkeypatch: pytest.MonkeyPatch):
+    """Engine should return all vocabulary logprobs
+
+    Args:
+      example_prompts: list of example prompts (test fixture)
+    """
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_USE_V1", "1")
+        runner = VllmRunner(
+            "facebook/opt-125m",
+            max_logprobs=-1,
+            enable_prefix_caching=False,
+            # 2 other llms alive during whole session
+            gpu_memory_utilization=0.15,
+            max_model_len=256)
+        sampling_params_logprobs_all = SamplingParams(max_tokens=5,
+                                                      logprobs=-1)
+        results_logprobs_all = runner.llm.generate(
+            example_prompts, sampling_params=sampling_params_logprobs_all)
+        vocab_size = runner.llm.llm_engine.get_model_config().get_vocab_size()
+        for i in range(len(results_logprobs_all)):
+            logprobs = results_logprobs_all[i].outputs[0].logprobs
+            assert logprobs is not None
+            for logprob in logprobs:
+                assert len(logprob) == vocab_size
+
+
 @pytest.mark.parametrize(
     "logprobs_mode",
     ["raw_logprobs", "raw_logits", "processed_logprobs", "processed_logits"])
diff --git a/vllm/config.py b/vllm/config.py
index 871df455ef58f..5c300e327397b 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -377,7 +377,8 @@ class ModelConfig:
     max_logprobs: int = 20
     """Maximum number of log probabilities to return when `logprobs` is
     specified in `SamplingParams`. The default value comes the default for the
-    OpenAI Chat Completions API."""
+    OpenAI Chat Completions API. -1 means no cap, i.e. all (output_length *
+    vocab_size) logprobs are allowed to be returned and it may cause OOM."""
     logprobs_mode: LogprobsMode = "raw_logprobs"
     """Indicates the content returned in the logprobs and prompt_logprobs.
     Supported mode:
@@ -1585,7 +1586,7 @@ class ModelConfig:
         """
         This method attempts to retrieve the non-default values of the
         generation config for this model.
-        
+
         The generation config can contain information about special tokens, as
         well as sampling parameters. Which is why this method exists separately
         to `get_diff_sampling_param`.
@@ -2066,7 +2067,7 @@ class ParallelConfig:
     and when data_parallel_size > 0. Enables running an AsyncLLM
     and API server on a "per-node" basis where vLLM load balances
     between local data parallel ranks, but an external LB balances
-    between vLLM nodes/replicas. Set explicitly in conjunction with 
+    between vLLM nodes/replicas. Set explicitly in conjunction with
     --data-parallel-start-rank."""
     enable_expert_parallel: bool = False
     """Use expert parallelism instead of tensor parallelism for MoE layers."""
diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
index 322e53b753948..52e4cbd096153 100644
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -156,6 +156,7 @@ class SamplingParams(
             Note that the implementation follows the OpenAI API: The API will
             always return the log probability of the sampled token, so there
             may be up to `logprobs+1` elements in the response.
+            When set to -1, return all `vocab_size` log probabilities.
         prompt_logprobs: Number of log probabilities to return per prompt token.
         detokenize: Whether to detokenize the output. Defaults to True.
         skip_special_tokens: Whether to skip special tokens in the output.
@@ -414,9 +415,10 @@ class SamplingParams(
             raise ValueError(
                 f"min_tokens must be less than or equal to "
                 f"max_tokens={self.max_tokens}, got {self.min_tokens}.")
-        if self.logprobs is not None and self.logprobs < 0:
+        if (self.logprobs is not None and self.logprobs != -1
+                and self.logprobs < 0):
             raise ValueError(
-                f"logprobs must be non-negative, got {self.logprobs}.")
+                f"logprobs must be non-negative or -1, got {self.logprobs}.")
         if self.prompt_logprobs is not None and self.prompt_logprobs < 0:
             raise ValueError(f"prompt_logprobs must be non-negative, got "
                              f"{self.prompt_logprobs}.")
diff --git a/vllm/v1/engine/logprobs.py b/vllm/v1/engine/logprobs.py
index e95da0a5e5aaf..3de7fa6889e55 100644
--- a/vllm/v1/engine/logprobs.py
+++ b/vllm/v1/engine/logprobs.py
@@ -138,7 +138,7 @@ class LogprobsProcessor:
 
     def pop_prompt_logprobs(self) -> Optional[PromptLogprobs]:
         """Pop and return all request prompt logprobs
-        
+
         The logprobs processor aggregates prompt chunk logprobs
         over one or more prefill chunks. This method returns
         all prompt logprobs at once and then forgets them.
@@ -176,7 +176,8 @@ class LogprobsProcessor:
         Returns:
           dict[token id, Logprob]
         """
-
+        if num_logprobs == -1:
+            num_logprobs = len(logprobs)
         # We do not need a special case for the sampled token
         # being in the topk, since inserting duplicated data
         # into a dictionary twice is the same as doing it once.
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
index 224acc47feb27..692a7dd5640e0 100644
--- a/vllm/v1/engine/processor.py
+++ b/vllm/v1/engine/processor.py
@@ -65,8 +65,11 @@ class Processor:
         params: SamplingParams,
     ) -> None:
         max_logprobs = self.model_config.max_logprobs
+        if max_logprobs == -1:
+            return
         # Validate sample logprobs.
-        if params.logprobs and params.logprobs > max_logprobs:
+        if params.logprobs and (params.logprobs == -1
+                                or params.logprobs > max_logprobs):
             raise ValueError(
                 f"Requested sample logprobs of {params.logprobs}, "
                 f"which is greater than max allowed: {max_logprobs}")
diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py
index c63041600f388..d9d0b4bec871a 100644
--- a/vllm/v1/worker/gpu_input_batch.py
+++ b/vllm/v1/worker/gpu_input_batch.py
@@ -337,7 +337,9 @@ class InputBatch:
                 self.generators[req_index] = request.generator
 
             if sampling_params.logprobs is not None:
-                self.num_logprobs[req_id] = sampling_params.logprobs
+                self.num_logprobs[req_id] = (self.vocab_size
+                                             if sampling_params.logprobs == -1
+                                             else sampling_params.logprobs)
             if sampling_params.prompt_logprobs is not None:
                 self.num_prompt_logprobs[
                     req_id] = sampling_params.prompt_logprobs

From 1539ced93a1ac3a78bef57d362cb9707c52f2a29 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Mon, 4 Aug 2025 18:37:06 +0800
Subject: [PATCH 193/224] [Doc] Update pooling model docs (#22186)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 docs/models/pooling_models.md   |  2 +-
 docs/models/supported_models.md | 69 ++++++++++++++++++++-------------
 2 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/docs/models/pooling_models.md b/docs/models/pooling_models.md
index 1fbbba7ace5e1..c6588363b63fb 100644
--- a/docs/models/pooling_models.md
+++ b/docs/models/pooling_models.md
@@ -120,7 +120,7 @@ A code example can be found here: <gh-file:examples/offline_inference/basic/clas
 ### `LLM.score`
 
 The [score][vllm.LLM.score] method outputs similarity scores between sentence pairs.
-It is designed for embedding models and cross encoder models. Embedding models use cosine similarity, and [cross-encoder models](https://www.sbert.net/examples/applications/cross-encoder/README.html) serve as rerankers between candidate query-document pairs in RAG systems.
+It is designed for embedding models and cross-encoder models. Embedding models use cosine similarity, and [cross-encoder models](https://www.sbert.net/examples/applications/cross-encoder/README.html) serve as rerankers between candidate query-document pairs in RAG systems.
 
 !!! note
     vLLM can only perform the model inference component (e.g. embedding, reranking) of RAG.
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index cd1228836b870..be3d51a025edf 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -311,6 +311,8 @@ See [this page](generative_models.md) for more information on how to use generat
 
 #### Text Generation
 
+These models primarily accept the [`LLM.generate`](./generative_models.md#llmgenerate) API. Chat/Instruct models additionally support the [`LLM.chat`](./generative_models.md#llmchat) API.
+
 <style>
 th {
   white-space: nowrap;
@@ -419,7 +421,9 @@ See [this page](./pooling_models.md) for more information on how to use pooling
     Since some model architectures support both generative and pooling tasks,
     you should explicitly specify `--runner pooling` to ensure that the model is used in pooling mode instead of generative mode.
 
-#### Text Embedding
+#### Embedding
+
+These models primarily support the [`LLM.embed`](./pooling_models.md#llmembed) API.
 
 | Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/distributed_serving.md) | [V1](gh-issue:8779) |
 |--------------|--------|-------------------|----------------------|---------------------------|---------------------|
@@ -457,28 +461,10 @@ If your model is not in the above list, we will try to automatically convert the
 [as_embedding_model][vllm.model_executor.models.adapters.as_embedding_model]. By default, the embeddings
 of the whole prompt are extracted from the normalized hidden state corresponding to the last token.
 
-#### Reward Modeling
-
-| Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/distributed_serving.md) | [V1](gh-issue:8779) |
-|--------------|--------|-------------------|----------------------|---------------------------|---------------------|
-| `InternLM2ForRewardModel` | InternLM2-based | `internlm/internlm2-1_8b-reward`, `internlm/internlm2-7b-reward`, etc. | ✅︎ | ✅︎ | ✅︎ |
-| `LlamaForCausalLM`<sup>C</sup> | Llama-based | `peiyi9979/math-shepherd-mistral-7b-prm`, etc. | ✅︎ | ✅︎ | ✅︎ |
-| `Qwen2ForRewardModel` | Qwen2-based | `Qwen/Qwen2.5-Math-RM-72B`, etc. | ✅︎ | ✅︎ | ✅︎ |
-| `Qwen2ForProcessRewardModel` | Qwen2-based | `Qwen/Qwen2.5-Math-PRM-7B`, etc. | ✅︎ | ✅︎ | ✅︎ |
-| `*Model`<sup>C</sup>, `*ForCausalLM`<sup>C</sup>, etc. | Generative models | N/A | \* | \* | \* |
-
-<sup>C</sup> Automatically converted into a reward model via `--convert reward`. ([details](./pooling_models.md#model-conversion))  
-\* Feature support is the same as that of the original model.
-
-If your model is not in the above list, we will try to automatically convert the model using
-[as_reward_model][vllm.model_executor.models.adapters.as_reward_model]. By default, we return the hidden states of each token directly.
-
-!!! important
-    For process-supervised reward models such as `peiyi9979/math-shepherd-mistral-7b-prm`, the pooling config should be set explicitly,
-    e.g.: `--override-pooler-config '{"pooling_type": "STEP", "step_tag_id": 123, "returned_token_ids": [456, 789]}'`.
-
 #### Classification
 
+These models primarily support the [`LLM.classify`](./pooling_models.md#llmclassify) API.
+
 | Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/distributed_serving.md) | [V1](gh-issue:8779) |
 |--------------|--------|-------------------|----------------------|---------------------------|---------------------|
 | `JambaForSequenceClassification` | Jamba | `ai21labs/Jamba-tiny-reward-dev`, etc. | ✅︎ | ✅︎ | |
@@ -491,7 +477,10 @@ If your model is not in the above list, we will try to automatically convert the
 If your model is not in the above list, we will try to automatically convert the model using
 [as_seq_cls_model][vllm.model_executor.models.adapters.as_seq_cls_model]. By default, the class probabilities are extracted from the softmaxed hidden state corresponding to the last token.
 
-#### Sentence Pair Scoring
+#### Cross-encoder / Reranker
+
+Cross-encoder and reranker models are a subset of classification models that accept two prompts as input.
+These models primarily support the [`LLM.score`](./pooling_models.md#llmscore) API.
 
 | Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/distributed_serving.md) | [V1](gh-issue:8779) |
 |--------------|--------|-------------------|----------------------|---------------------------|---------------------|
@@ -501,6 +490,7 @@ If your model is not in the above list, we will try to automatically convert the
 | `Qwen3ForSequenceClassification` | Qwen3-based | `tomaarsen/Qwen3-Reranker-0.6B-seq-cls`, `Qwen/Qwen3-Reranker-0.6B` (see note), etc. | ✅︎ | ✅︎ | ✅︎ |
 | `RobertaForSequenceClassification` | RoBERTa-based | `cross-encoder/quora-roberta-base`, etc. | | | |
 | `XLMRobertaForSequenceClassification` | XLM-RoBERTa-based | `BAAI/bge-reranker-v2-m3`, etc. | | | |
+| `*Model`<sup>C</sup>, `*ForCausalLM`<sup>C</sup>, etc. | Generative models | N/A | \* | \* | \* |
 
 <sup>C</sup> Automatically converted into a classification model via `--convert classify`. ([details](./pooling_models.md#model-conversion))  
 \* Feature support is the same as that of the original model.
@@ -526,6 +516,28 @@ If your model is not in the above list, we will try to automatically convert the
     vllm serve Qwen/Qwen3-Reranker-0.6B --hf_overrides '{"architectures": ["Qwen3ForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}'
     ```
 
+#### Reward Modeling
+
+These models primarily support the [`LLM.reward`](./pooling_models.md#llmreward) API.
+
+| Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/distributed_serving.md) | [V1](gh-issue:8779) |
+|--------------|--------|-------------------|----------------------|---------------------------|---------------------|
+| `InternLM2ForRewardModel` | InternLM2-based | `internlm/internlm2-1_8b-reward`, `internlm/internlm2-7b-reward`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `LlamaForCausalLM`<sup>C</sup> | Llama-based | `peiyi9979/math-shepherd-mistral-7b-prm`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `Qwen2ForRewardModel` | Qwen2-based | `Qwen/Qwen2.5-Math-RM-72B`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `Qwen2ForProcessRewardModel` | Qwen2-based | `Qwen/Qwen2.5-Math-PRM-7B`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `*Model`<sup>C</sup>, `*ForCausalLM`<sup>C</sup>, etc. | Generative models | N/A | \* | \* | \* |
+
+<sup>C</sup> Automatically converted into a reward model via `--convert reward`. ([details](./pooling_models.md#model-conversion))  
+\* Feature support is the same as that of the original model.
+
+If your model is not in the above list, we will try to automatically convert the model using
+[as_reward_model][vllm.model_executor.models.adapters.as_reward_model]. By default, we return the hidden states of each token directly.
+
+!!! important
+    For process-supervised reward models such as `peiyi9979/math-shepherd-mistral-7b-prm`, the pooling config should be set explicitly,
+    e.g.: `--override-pooler-config '{"pooling_type": "STEP", "step_tag_id": 123, "returned_token_ids": [456, 789]}'`.
+
 [](){ #supported-mm-models }
 
 ## List of Multimodal Language Models
@@ -579,6 +591,8 @@ See [this page](generative_models.md) for more information on how to use generat
 
 #### Text Generation
 
+These models primarily accept the [`LLM.generate`](./generative_models.md#llmgenerate) API. Chat/Instruct models additionally support the [`LLM.chat`](./generative_models.md#llmchat) API.
+
 | Architecture | Models | Inputs | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/distributed_serving.md) | [V1](gh-issue:8779) |
 |--------------|--------|--------|-------------------|----------------------|---------------------------|---------------------|
 | `AriaForConditionalGeneration` | Aria | T + I<sup>+</sup> | `rhymes-ai/Aria` | | | ✅︎ |
@@ -720,11 +734,9 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
 
 See [this page](./pooling_models.md) for more information on how to use pooling models.
 
-!!! important
-    Since some model architectures support both generative and pooling tasks,
-    you should explicitly specify `--runner pooling` to ensure that the model is used in pooling mode instead of generative mode.
+#### Embedding
 
-#### Text Embedding
+These models primarily support the [`LLM.embed`](./pooling_models.md#llmembed) API.
 
 !!! note
     To get the best results, you should use pooling models that are specifically trained as such.
@@ -742,7 +754,10 @@ The following table lists those that are tested in vLLM.
 
 ---
 
-#### Scoring
+#### Cross-encoder / Reranker
+
+Cross-encoder and reranker models are a subset of classification models that accept two prompts as input.
+These models primarily support the [`LLM.score`](./pooling_models.md#llmscore) API.
 
 | Architecture                        | Models             | Inputs   | Example HF Models        | [LoRA][lora-adapter]   | [PP][distributed-serving]   | [V1](gh-issue:8779)   |
 |-------------------------------------|--------------------|----------|--------------------------|------------------------|-----------------------------|-----------------------|

From a5fff3bd49a5ea888cf0dbdfe7ecf140455fa8d4 Mon Sep 17 00:00:00 2001
From: Raghav Ravishankar <113712354+alyosha-swamy@users.noreply.github.com>
Date: Mon, 4 Aug 2025 16:39:56 +0530
Subject: [PATCH 194/224] Fix Arcee model weight loading: Add custom
 load_weights (#21725)

Signed-off-by: alyosha-swamy <raghav@arcee.ai>
---
 tests/models/registry.py            |  3 +-
 vllm/model_executor/models/arcee.py | 83 +++++++++++++++++++++++++++--
 2 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/tests/models/registry.py b/tests/models/registry.py
index ffa6b755adf43..d86bd20fb0e34 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -139,8 +139,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
                                    trust_remote_code=True),
     "AquilaForCausalLM": _HfExamplesInfo("BAAI/AquilaChat2-7B",
                                          trust_remote_code=True),
-    "ArceeForCausalLM": _HfExamplesInfo("arcee-ai/AFM-4.5B-Base",
-                                        is_available_online=False),
+    "ArceeForCausalLM": _HfExamplesInfo("arcee-ai/AFM-4.5B-Base"),
     "ArcticForCausalLM": _HfExamplesInfo("Snowflake/snowflake-arctic-instruct",
                                          trust_remote_code=True),
     "BaiChuanForCausalLM": _HfExamplesInfo("baichuan-inc/Baichuan-7B",
diff --git a/vllm/model_executor/models/arcee.py b/vllm/model_executor/models/arcee.py
index 4e3ba107ba7e0..4cf73e2e0ea56 100644
--- a/vllm/model_executor/models/arcee.py
+++ b/vllm/model_executor/models/arcee.py
@@ -24,10 +24,12 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader, maybe_remap_kv_scale_name)
 from vllm.sequence import IntermediateTensors
 
 from .interfaces import SupportsLoRA, SupportsPP
-from .utils import (AutoWeightsLoader, PPMissingLayer,
+from .utils import (AutoWeightsLoader, PPMissingLayer, is_pp_missing_parameter,
                     make_empty_intermediate_tensors_factory, make_layers)
 
 
@@ -260,6 +262,81 @@ class ArceeModel(nn.Module):
             return hidden_states, aux_hidden_states
         return hidden_states
 
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
+        """Load weights, mapping q/k/v projections to fused qkv_proj."""
+        stacked_params_mapping = [
+            (".qkv_proj", ".q_proj", "q"),
+            (".qkv_proj", ".k_proj", "k"),
+            (".qkv_proj", ".v_proj", "v"),
+        ]
+
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        for name, loaded_weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+            if ("rotary_emb.cos_cached" in name
+                    or "rotary_emb.sin_cached" in name):
+                continue
+
+            if (self.quant_config is not None and
+                (scale_name := self.quant_config.get_cache_scale(name))):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader",
+                                        default_weight_loader)
+                loaded_weight = (loaded_weight if loaded_weight.dim() == 0 else
+                                 loaded_weight[0])
+                weight_loader(param, loaded_weight)
+                loaded_params.add(scale_name)
+                continue
+
+            if "scale" in name:
+                remapped_name = maybe_remap_kv_scale_name(name, params_dict)
+                if remapped_name is None:
+                    continue
+                name = remapped_name
+
+            mapped = False
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+
+                name = name.replace(weight_name, param_name)
+
+                if name.endswith(".bias") and name not in params_dict:
+                    mapped = True
+                    break
+
+                if is_pp_missing_parameter(name, self):
+                    mapped = True
+                    break
+
+                param = params_dict[name]
+                weight_loader = param.weight_loader  # type: ignore[attr-defined]
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name)
+                mapped = True
+                break
+
+            if mapped:
+                continue
+
+            if name.endswith(".bias") and name not in params_dict:
+                continue
+
+            if is_pp_missing_parameter(name, self):
+                continue
+
+            param = params_dict[name]
+            weight_loader = getattr(param, "weight_loader",
+                                    default_weight_loader)
+            weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+
+        return loaded_params
+
 
 class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
     """Arcee Model for causal language modeling, integrated with vLLM
@@ -304,8 +381,7 @@ class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
         else:
             # Placeholder for lm_head on non-last ranks
             self.lm_head = PPMissingLayer()
-        # Provide a reference to the model's method for generating empty
-        # tensors (used in pipeline parallel schedule)
+
         self.make_empty_intermediate_tensors = (
             self.model.make_empty_intermediate_tensors)
 
@@ -316,7 +392,6 @@ class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
         intermediate_tensors: Optional[IntermediateTensors] = None,
         inputs_embeds: Optional[torch.Tensor] = None
     ) -> Union[torch.Tensor, IntermediateTensors]:
-        # Forward pass through the Arcee model backbone
         model_output = self.model(input_ids=input_ids,
                                   positions=positions,
                                   intermediate_tensors=intermediate_tensors,

From 9af654cc38c74cd51b00c609eaa290e495f225e1 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 4 Aug 2025 05:12:48 -0700
Subject: [PATCH 195/224] [Responses API] Ignore `store=True` and process the
 request by default (#22185)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 vllm/entrypoints/openai/serving_responses.py | 31 ++++++++++++++++++--
 vllm/envs.py                                 |  3 +-
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 5e9401cbd7473..e009529fbd2ad 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -90,8 +90,17 @@ class OpenAIServingResponses(OpenAIServing):
             logger.info("Using default chat sampling params from %s: %s",
                         source, self.default_sampling_params)
 
-        # False by default.
+        # If False (default), the "store" option is (silently) ignored and the
+        # response is not stored. If True, the response is stored in memory.
+        # NOTE(woosuk): This may not be intuitive for users, as the default
+        # behavior in OpenAI's Responses API is to store the response, but
+        # vLLM's default behavior is not.
         self.enable_store = envs.VLLM_ENABLE_RESPONSES_API_STORE
+        if self.enable_store:
+            logger.warning_once(
+                "`VLLM_ENABLE_RESPONSES_API_STORE` is enabled. This may "
+                "cause a memory leak since we never remove responses from "
+                "the store.")
         # HACK(woosuk): This is a hack. We should use a better store.
         # FIXME: If enable_store=True, this may cause a memory leak since we
         # never remove responses from the store.
@@ -121,9 +130,25 @@ class OpenAIServingResponses(OpenAIServing):
         if self.engine_client.errored:
             raise self.engine_client.dead_error
 
-        # If store is not enabled, return an error.
         if request.store and not self.enable_store:
-            return self._make_store_not_supported_error()
+            if request.background:
+                return self.create_error_response(
+                    err_type="invalid_request_error",
+                    message=(
+                        "This vLLM engine does not support `store=True` and "
+                        "therefore does not support the background mode. To "
+                        "enable these features, set the environment variable "
+                        "`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
+                        "the vLLM server."),
+                    status_code=HTTPStatus.BAD_REQUEST,
+                )
+            # Disable the store option.
+            # NOTE(woosuk): Although returning an error is possible, we opted
+            # to implicitly disable store and process the request anyway, as
+            # we assume most users do not intend to actually store the response
+            # (i.e., their request's `store=True` just because it's the default
+            # value).
+            request.store = False
 
         # Handle the previous response ID.
         prev_response_id = request.previous_response_id
diff --git a/vllm/envs.py b/vllm/envs.py
index 8d3c7eab471cf..78f955f78a987 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -1060,7 +1060,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
 
     # Enables support for the "store" option in the OpenAI Responses API.
     # When set to 1, vLLM's OpenAI server will retain the input and output
-    # messages for those requests in memory. By default, this is disabled (0).
+    # messages for those requests in memory. By default, this is disabled (0),
+    # and the "store" option is ignored.
     # NOTE/WARNING:
     # 1. Messages are kept in memory only (not persisted to disk) and will be
     #    lost when the vLLM server shuts down.

From 309c1bb822c94436e8beff60d68404b4cecd62b8 Mon Sep 17 00:00:00 2001
From: ericehanley <ericehanley@google.com>
Date: Mon, 4 Aug 2025 10:12:06 -0500
Subject: [PATCH 196/224] [Bug] Update auto_tune.sh to separate benchmarking
 and profiling. (#21629)

Signed-off-by: Eric Hanley <ericehanley@google.com>
---
 benchmarks/auto_tune/auto_tune.sh | 123 +++++++++++++++++++-----------
 1 file changed, 80 insertions(+), 43 deletions(-)

diff --git a/benchmarks/auto_tune/auto_tune.sh b/benchmarks/auto_tune/auto_tune.sh
index df26376504b95..82c20ffa6554c 100644
--- a/benchmarks/auto_tune/auto_tune.sh
+++ b/benchmarks/auto_tune/auto_tune.sh
@@ -49,6 +49,7 @@ best_throughput=0
 best_max_num_seqs=0
 best_num_batched_tokens=0
 best_goodput=0
+best_request_rate=0
 
 start_server() {
     local gpu_memory_utilization=$1
@@ -57,18 +58,35 @@ start_server() {
     local vllm_log=$4
     local profile_dir=$5
 
-    pkill -f vllm
+    pkill -if vllm
 
-    VLLM_USE_V1=1 VLLM_SERVER_DEV_MODE=1 VLLM_TORCH_PROFILER_DIR=$profile_dir vllm serve $MODEL \
-        --port 8004 \
-        --gpu-memory-utilization $gpu_memory_utilization \
-        --max-num-seqs $max_num_seqs \
-        --max-num-batched-tokens $max_num_batched_tokens \
-        --tensor-parallel-size $TP \
-        --enable-prefix-caching \
-        --load-format dummy \
-        --download-dir "$DOWNLOAD_DIR" \
-        --max-model-len $MAX_MODEL_LEN > "$vllm_log" 2>&1 &
+    # Define the common arguments as a bash array.
+    # Each argument and its value are separate elements.
+    local common_args_array=(
+        "$MODEL"
+        "--disable-log-requests"
+        "--port" "8004"
+        "--gpu-memory-utilization" "$gpu_memory_utilization"
+        "--max-num-seqs" "$max_num_seqs"
+        "--max-num-batched-tokens" "$max_num_batched_tokens"
+        "--tensor-parallel-size" "$TP"
+        "--enable-prefix-caching"
+        "--load-format" "dummy"
+        "--download-dir" "$DOWNLOAD_DIR"
+        "--max-model-len" "$MAX_MODEL_LEN"
+    )
+
+    # Use the array expansion "${common_args_array[@]}"
+    # This correctly passes each element as a separate argument.
+    if [[ -n "$profile_dir" ]]; then
+        # Start server with profiling enabled
+        VLLM_USE_V1=1 VLLM_SERVER_DEV_MODE=1 VLLM_TORCH_PROFILER_DIR=$profile_dir \
+            vllm serve "${common_args_array[@]}" > "$vllm_log" 2>&1 &
+    else
+        # Start server without profiling
+        VLLM_USE_V1=1 VLLM_SERVER_DEV_MODE=1 \
+            vllm serve "${common_args_array[@]}" > "$vllm_log" 2>&1 &
+    fi
 
     # wait for 10 minutes...
     server_started=0
@@ -82,6 +100,7 @@ start_server() {
             sleep 10
         fi
     done
+
     if (( ! server_started )); then
         echo "server did not start within 10 minutes. Please check server log at $vllm_log".
         return 1
@@ -90,37 +109,20 @@ start_server() {
     fi
 }
 
-update_best_profile() {
-    local profile_dir=$1
-    local profile_index=$2
-    sorted_paths=($(find "$profile_dir" -maxdepth 1 -not -path "$profile_dir" | sort))
-    selected_profile_file=
-    if [[ "$SYSTEM" == "TPU" ]]; then
-        selected_profile_file="${sorted_paths[$profile_index]}/*.xplane.pb"
-    fi
-    if [[ "$SYSTEM" == "GPU" ]]; then
-        selected_profile_file="${sorted_paths[$profile_index]}"
-    fi
-    rm -f $PROFILE_PATH/*
-    cp $selected_profile_file $PROFILE_PATH
-}
-
 run_benchmark() {
     local max_num_seqs=$1
     local max_num_batched_tokens=$2
     local gpu_memory_utilization=$3
     echo "max_num_seq: $max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens"
     local vllm_log="$LOG_FOLDER/vllm_log_${max_num_seqs}_${max_num_batched_tokens}.txt"
-    local profile_dir="$LOG_FOLDER/profile_${max_num_seqs}_${max_num_batched_tokens}"
     echo "vllm_log: $vllm_log"
     echo
     rm -f $vllm_log
-    mkdir -p $profile_dir
-    pkill -f vllm
-    local profile_index=0
+    pkill -if vllm
 
     echo "starting server..."
-    start_server $gpu_memory_utilization $max_num_seqs $max_num_batched_tokens $vllm_log $profile_dir
+    # Call start_server without a profile_dir to avoid profiling overhead
+    start_server $gpu_memory_utilization $max_num_seqs $max_num_batched_tokens $vllm_log ""
     result=$?
     if [[ "$result" -eq 1 ]]; then
         echo "server failed to start. gpu_memory_utilization:$gpu_memory_utilization, max_num_seqs:$max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens"
@@ -134,7 +136,8 @@ run_benchmark() {
     # get a basic qps by using request-rate inf
     bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_inf.txt"
     prefix_len=$(( INPUT_LEN * MIN_CACHE_HIT_PCT / 100 ))
-adjusted_input_len=$(( INPUT_LEN - prefix_len ))
+    adjusted_input_len=$(( INPUT_LEN - prefix_len ))
+    # --profile flag is removed from this call
     vllm bench serve \
         --backend vllm \
         --model $MODEL  \
@@ -148,8 +151,7 @@ adjusted_input_len=$(( INPUT_LEN - prefix_len ))
         --goodput e2el:$MAX_LATENCY_ALLOWED_MS \
         --num-prompts 1000 \
         --random-prefix-len $prefix_len \
-        --port 8004 \
-        --profile &> "$bm_log"
+        --port 8004 &> "$bm_log"
     throughput=$(grep "Request throughput (req/s):" "$bm_log" | sed 's/[^0-9.]//g')
     e2el=$(grep "P99 E2EL (ms):" "$bm_log" | awk '{print $NF}')
     goodput=$(grep "Request goodput (req/s):" "$bm_log" | sed 's/[^0-9.]//g')
@@ -163,7 +165,6 @@ adjusted_input_len=$(( INPUT_LEN - prefix_len ))
     # start from request-rate as int(throughput) + 1
         request_rate=$((${throughput%.*} + 1))
         while ((request_rate > 0)); do
-            profile_index=$((profile_index+1))
             # clear prefix cache
             curl -X POST http://0.0.0.0:8004/reset_prefix_cache
             sleep 5
@@ -201,12 +202,7 @@ adjusted_input_len=$(( INPUT_LEN - prefix_len ))
             best_max_num_seqs=$max_num_seqs
             best_num_batched_tokens=$max_num_batched_tokens
             best_goodput=$goodput
-            if [[ "$SYSTEM" == "TPU" ]]; then
-                update_best_profile "$profile_dir/plugins/profile" $profile_index
-            fi
-            if [[ "$SYSTEM" == "GPU" ]]; then
-                update_best_profile "$profile_dir" $profile_index
-            fi
+            best_request_rate=$request_rate
         fi
     else
         echo "max_num_seqs: $max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens does not meet latency requirement ${MAX_LATENCY_ALLOWED_MS}"
@@ -215,7 +211,7 @@ adjusted_input_len=$(( INPUT_LEN - prefix_len ))
 
     echo "best_max_num_seqs: $best_max_num_seqs, best_num_batched_tokens: $best_num_batched_tokens, best_throughput: $best_throughput"
 
-    pkill vllm
+    pkill -if vllm
     sleep 10
     printf '=%.0s' $(seq 1 20)
     return 0
@@ -228,7 +224,8 @@ read -r -a num_batched_tokens_list <<< "$NUM_BATCHED_TOKENS_LIST"
 gpu_memory_utilization=0.98
 find_gpu_memory_utilization=0
 while (( $(echo "$gpu_memory_utilization >= 0.9" | bc -l) )); do
-    start_server $gpu_memory_utilization "${num_seqs_list[-1]}" "${num_batched_tokens_list[-1]}" "$LOG_FOLDER/vllm_log_gpu_memory_utilization_$gpu_memory_utilization.log"
+    # Pass empty string for profile_dir argument
+    start_server $gpu_memory_utilization "${num_seqs_list[-1]}" "${num_batched_tokens_list[-1]}" "$LOG_FOLDER/vllm_log_gpu_memory_utilization_$gpu_memory_utilization.log" ""
     result=$?
     if [[ "$result" -eq 0 ]]; then
         find_gpu_memory_utilization=1
@@ -251,5 +248,45 @@ for num_seqs in "${num_seqs_list[@]}"; do
     done
 done
 echo "finish permutations"
+
+# =================================================================================
+# FINAL PROFILING RUN FOR THE BEST CONFIGURATION
+# =================================================================================
+if (( $(echo "$best_throughput > 0" | bc -l) )); then
+    echo
+    echo "Benchmark tuning finished. Now running profiling on the best configuration found..."
+    echo "Best config: max_num_seqs: $best_max_num_seqs, max_num_batched_tokens: $best_num_batched_tokens, throughput: $best_throughput"
+    echo
+
+    vllm_log="$LOG_FOLDER/vllm_log_BEST_PROFILE.txt"
+    bm_log="$LOG_FOLDER/bm_log_BEST_PROFILE.txt"
+
+    # Start server with the best params and profiling ENABLED
+    echo "Starting server for profiling..."
+    start_server $gpu_memory_utilization $best_max_num_seqs $best_num_batched_tokens "$vllm_log" "$PROFILE_PATH"
+
+    # Run benchmark with the best params and the --profile flag
+    echo "Running benchmark with profiling..."
+    prefix_len=$(( INPUT_LEN * MIN_CACHE_HIT_PCT / 100 ))
+    adjusted_input_len=$(( INPUT_LEN - prefix_len ))
+    vllm bench serve \
+        --backend vllm \
+        --model $MODEL \
+        --dataset-name random \
+        --random-input-len $adjusted_input_len \
+        --random-output-len $OUTPUT_LEN \
+        --ignore-eos \
+        --disable-tqdm \
+        --request-rate $best_request_rate \
+        --percentile-metrics ttft,tpot,itl,e2el \
+        --goodput e2el:$MAX_LATENCY_ALLOWED_MS \
+        --num-prompts 100 \
+        --random-prefix-len $prefix_len \
+        --port 8004 \
+        --profile &> "$bm_log"
+else
+    echo "No configuration met the latency requirements. Skipping final profiling run."
+fi
+pkill -if vllm
 echo "best_max_num_seqs: $best_max_num_seqs, best_num_batched_tokens: $best_num_batched_tokens, best_throughput: $best_throughput, profile saved in: $PROFILE_PATH"
 echo "best_max_num_seqs: $best_max_num_seqs, best_num_batched_tokens: $best_num_batched_tokens, best_throughput: $best_throughput, profile saved in: $PROFILE_PATH" >> "$RESULT"

From c09efff9767ad26ecf99a6e6c13243612c278df3 Mon Sep 17 00:00:00 2001
From: Zhonghua Deng <abzhonghua@gmail.com>
Date: Tue, 5 Aug 2025 04:17:05 +0800
Subject: [PATCH 197/224] [Bugfix][V1][P/D]Fix the uneven polling issue in the
 toy proxy for P2pNcclConnector (#21819)

Signed-off-by: Abatom <abzhonghua@gmail.com>
---
 .../disagg_proxy_p2p_nccl_xpyd.py                            | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
index 73da7af85f1d9..0c7d32d7862e3 100644
--- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
+++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
@@ -46,7 +46,7 @@ def _listen_for_register(poller, router_socket):
                 global prefill_instances
                 global prefill_cv
                 with prefill_cv:
-                    node = prefill_instances.pop(data["http_address"], None)
+                    node = prefill_instances.get(data["http_address"], None)
                     prefill_instances[data["http_address"]] = (
                         data["zmq_address"],
                         time.time() + DEFAULT_PING_SECONDS,
@@ -57,7 +57,7 @@ def _listen_for_register(poller, router_socket):
                 global decode_instances
                 global decode_cv
                 with decode_cv:
-                    node = decode_instances.pop(data["http_address"], None)
+                    node = decode_instances.get(data["http_address"], None)
                     decode_instances[data["http_address"]] = (
                         data["zmq_address"],
                         time.time() + DEFAULT_PING_SECONDS,
@@ -69,6 +69,7 @@ def _listen_for_register(poller, router_socket):
                     remote_address,
                     data,
                 )
+                return
 
             if node is None:
                 print(f"🔵Add [HTTP:{data['http_address']}, ZMQ:{data['zmq_address']}]")

From bdcb42e45db5cbbc02b0f69ac304c87d7a8cb6b6 Mon Sep 17 00:00:00 2001
From: "Po-Han Huang (NVIDIA)" <53919306+nvpohanh@users.noreply.github.com>
Date: Tue, 5 Aug 2025 09:02:55 +0800
Subject: [PATCH 198/224] [NVIDIA] Auto detect modelopt quant and fix DSR1-FP4
 weight loading (#22073)

---
 vllm/config.py                                | 15 ++++++
 vllm/model_executor/layers/fused_moe/layer.py | 53 +++++++++++++------
 vllm/transformers_utils/config.py             | 14 +++++
 3 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 5c300e327397b..dd59526471782 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1108,6 +1108,21 @@ class ModelConfig:
         if quant_cfg is None:
             # compressed-tensors uses a "compression_config" key
             quant_cfg = getattr(self.hf_config, "compression_config", None)
+
+        else:
+            # Set quant_method for ModelOpt models.
+            producer_name = quant_cfg.get("producer", {}).get("name")
+            if producer_name == "modelopt":
+                quant_algo = quant_cfg.get("quantization",
+                                           {}).get("quant_algo")
+                if quant_algo == "FP8":
+                    quant_cfg["quant_method"] = "modelopt"
+                elif quant_algo == "NVFP4":
+                    quant_cfg["quant_method"] = "modelopt_fp4"
+                elif quant_algo is not None:
+                    raise ValueError(
+                        f"Unknown ModelOpt quant algo: {quant_algo}")
+
         return quant_cfg
 
     def _verify_quantization(self) -> None:
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index 9e7296feeae1e..f155a1b11fbff 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -919,9 +919,13 @@ class FusedMoE(torch.nn.Module):
         elif shard_id == "w2":
             param_data[expert_id] = loaded_weight
 
-    def _load_w13_weight_scale(self, shard_dim: int,
-                               loaded_weight: torch.Tensor,
-                               param: torch.Tensor, tp_rank: int):
+    def _load_combined_w13_weight_scale(self, shard_dim: int,
+                                        loaded_weight: torch.Tensor,
+                                        param: torch.Tensor, tp_rank: int):
+        """
+        Load w13 weight scales assuming that w1 weight scales and w3 weight
+        scales are stored in the same loaded_weight tensor.
+        """
         shard_size = param.shape[shard_dim]
         loaded_weight = loaded_weight.narrow(shard_dim, shard_size * tp_rank,
                                              shard_size)
@@ -1168,24 +1172,43 @@ class FusedMoE(torch.nn.Module):
             uses_weight_scale_2 = self.quant_method.uses_weight_scale_2_pattern(
             )
 
-            # For per-tensor, FP4 uses "weight_scale_2", FP8 uses "weight_scale"
-            per_tensor_conditions = (
-                "weight_scale_2" in weight_name if uses_weight_scale_2 else
-                "weight_scale" in weight_name) or "input_scale" in weight_name
-
-            if "w13_weight_scale" in weight_name:
-                self._load_w13_weight_scale(shard_dim=shard_dim,
-                                            loaded_weight=loaded_weight,
-                                            param=param,
-                                            tp_rank=self.tp_rank)
-            elif per_tensor_conditions:
+            # Call _load_per_tensor_weight_scale() to load per-tensor (scalar)
+            # weights scales.
+            # Input scales are always per-tensor.
+            # Weight scales: FP4 uses "weight_scale_2" and FP8 uses
+            # "weight_scale" for per-tensor scales.
+            is_per_tensor = ("weight_scale_2" in weight_name
+                             if uses_weight_scale_2 else "weight_scale"
+                             in weight_name) or "input_scale" in weight_name
+            if is_per_tensor:
                 self._load_per_tensor_weight_scale(
                     shard_id=shard_id,
                     param=param,
                     loaded_weight=loaded_weight,
                     expert_id=expert_id,
                 )
-            elif "weight" in weight_name:
+                return True if return_success else None
+
+            # If the weight is w13_weight_scale and w13_weight_scales are
+            # combined into single loaded_weight, call
+            # _load_combined_w13_weight_scale() to load it.
+            # This is checked by comparing the hidden_out dims of the
+            # loaded_weight and the param.
+            if "w13_weight_scale" in weight_name:
+                loaded_weight_hidden_out = loaded_weight.shape[-2]
+                param_hidden_out = param.data.shape[-2] * self.tp_size
+                if loaded_weight_hidden_out == param_hidden_out:
+                    self._load_combined_w13_weight_scale(
+                        shard_dim=shard_dim,
+                        loaded_weight=loaded_weight,
+                        param=param,
+                        tp_rank=self.tp_rank,
+                    )
+                    return True if return_success else None
+
+            # For other weights, call _load_model_weight_or_group_weight_scale()
+            # to load it.
+            if "weight" in weight_name:
                 self._load_model_weight_or_group_weight_scale(
                     shard_id=shard_id,
                     shard_dim=shard_dim,
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index cc41a771d06c2..8fe153464d360 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -449,6 +449,20 @@ def get_config(
         model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
         config.update({"architectures": [model_type]})
 
+    # ModelOpt 0.31.0 and after saves the quantization config in the model
+    # config file.
+    quantization_config = config_dict.get("quantization_config", None)
+
+    # ModelOpt 0.29.0 and before saves the quantization config in a separate
+    # "hf_quant_config.json" in the same directory as the model config file.
+    if quantization_config is None \
+        and file_or_path_exists(model, "hf_quant_config.json", revision):
+        quantization_config = get_hf_file_to_dict("hf_quant_config.json",
+                                                  model, revision)
+
+    if quantization_config is not None:
+        config.quantization_config = quantization_config
+
     if hf_overrides_kw:
         logger.debug("Overriding HF config with %s", hf_overrides_kw)
         config.update(hf_overrides_kw)

From 2dffac464c82ac7c509c78f7d12a7c72ea765a63 Mon Sep 17 00:00:00 2001
From: PiteXChen <44110731+CLFutureX@users.noreply.github.com>
Date: Tue, 5 Aug 2025 09:34:10 +0800
Subject: [PATCH 199/224] [Bugfix] V1 Fix the cursor leakage issue during
 request scheduling. (#21173)

Signed-off-by: CLFutureX <775523362@qq.com>
---
 tests/v1/core/test_scheduler.py | 97 ++++++++++++++++++++++++++++++++-
 vllm/v1/core/sched/scheduler.py |  6 +-
 2 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py
index c719d1975bba2..3f82261a59a76 100644
--- a/tests/v1/core/test_scheduler.py
+++ b/tests/v1/core/test_scheduler.py
@@ -1307,13 +1307,18 @@ def create_requests_with_priority(
         mm_positions: Optional[list[PlaceholderRange]] = None,
         max_tokens: int = 16,
         stop_token_ids: Optional[list[int]] = None,
-        prompt_logprobs: Optional[int] = None):
+        prompt_logprobs: Optional[int] = None,
+        request_ids: Optional[list[str]] = None):
     """Create requests with specified priorities and arrival times."""
     assert len(priorities) == num_requests
     if arrival_times is not None:
         assert len(arrival_times) == num_requests
     else:
         arrival_times = [float(i) for i in range(num_requests)]
+    if request_ids is not None:
+        assert len(request_ids) == num_requests
+    else:
+        request_ids = [f"{i}" for i in range(num_requests)]
 
     sampling_params = SamplingParams(ignore_eos=False,
                                      max_tokens=max_tokens,
@@ -1328,7 +1333,7 @@ def create_requests_with_priority(
             mm_position = None
             mm_inputs = None
         request = Request(
-            request_id=f"{i}",
+            request_id=request_ids[i],
             prompt_token_ids=[i] * num_tokens,
             sampling_params=sampling_params,
             pooling_params=None,
@@ -1829,3 +1834,91 @@ def test_schedule_skip_tokenizer_init_structured_output_request():
     assert len(output.scheduled_new_reqs) == 0
     assert len(scheduler.running) == 0
     assert len(scheduler.waiting) == 1
+
+
+def test_priority_scheduling_preemption_victim_iterator_order():
+    """Test that the scheduling order is maintained after
+    preempting lower-priority requests."""
+    scheduler = create_scheduler_with_priority(
+        max_num_batched_tokens=200,
+        num_blocks=9,
+    )
+    # Add three priority requests first.
+    priority_requests = create_requests_with_priority(
+        num_requests=3,
+        priorities=[3, 4, 5],
+        arrival_times=[1.0, 2.0, 3.0],
+        num_tokens=15,
+        request_ids=["1", "2", "3"],
+    )
+
+    for request in priority_requests:
+        scheduler.add_request(request)
+    # After scheduling, transfer from the waiting queue to the running queue.
+    # At this time, 3 blocks have been allocated, and 5 available blocks remain.
+    output = scheduler.schedule()
+
+    model_output = ModelRunnerOutput(
+        req_ids=[req.request_id for req in priority_requests],
+        req_id_to_index={
+            req.request_id: i
+            for i, req in enumerate(priority_requests)
+        },
+        sampled_token_ids=[[15] for _ in priority_requests],
+        spec_token_ids=None,
+        logprobs=None,
+        prompt_logprobs_dict={},
+        pooler_output=[],
+    )
+    scheduler.update_from_output(output, model_output)
+
+    # Add tow high priority requests.
+    high_priority_requests = create_requests_with_priority(
+        num_requests=2,
+        priorities=[1, 2],
+        arrival_times=[4.0, 5.0],
+        num_tokens=16,
+        request_ids=["4", "5"],
+    )
+    for request in high_priority_requests:
+        scheduler.add_request(request)
+
+    # After scheduling, transfer the two high-priority requests from
+    # the waiting queue to the running queue.
+    # the IDs of the requests in the running queue are: 1, 2, 3, 4, 5.
+    # At this time, 3+2 blocks have been allocated,
+    # and 3 available blocks remain.
+    output = scheduler.schedule()
+
+    merge_requests = priority_requests + high_priority_requests
+
+    model_output = ModelRunnerOutput(
+        req_ids=[req.request_id for req in merge_requests],
+        req_id_to_index={
+            req.request_id: i
+            for i, req in enumerate(merge_requests)
+        },
+        sampled_token_ids=[[1] for _ in merge_requests],
+        spec_token_ids=None,
+        logprobs=None,
+        prompt_logprobs_dict={},
+        pooler_output=[],
+    )
+    scheduler.update_from_output(output, model_output)
+
+    # At this time, the request with the lowest priority
+    # (request.id = 2) will be preempted, freeing up 2 blocks,
+    # which exactly meets the resource allocation requirements
+    # for request.id = 4 and request.id = 5.
+    output = scheduler.schedule()
+
+    # Should schedule the new request without preemption.
+    assert len(scheduler.running) == 4  #
+    assert len(scheduler.waiting) == 1  #
+
+    running_priorities = [req.priority for req in scheduler.running]
+    running_req_ids = [req.request_id for req in scheduler.running]
+
+    assert running_priorities == [3, 4, 1, 2]
+    assert running_req_ids == ["1", "2", "4", "5"]
+    assert scheduler.waiting.peek_request().priority == 5
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
index 49a744cfec69a..413a853dfecbc 100644
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -257,7 +257,11 @@ class Scheduler(SchedulerInterface):
                             self.running,
                             key=lambda r: (r.priority, r.arrival_time),
                         )
-                        self.running.remove(preempted_req)
+                        preempted_index = self.running.index(preempted_req)
+                        if preempted_index <= req_index:
+                            req_index -= 1
+                            scheduled_running_reqs.remove(preempted_req)
+                        self.running.pop(preempted_index)
                     else:
                         preempted_req = self.running.pop()
 

From 7175817637bde6c668b75cce91c022e3a33b3684 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 4 Aug 2025 18:37:06 -0700
Subject: [PATCH 200/224] Revert "[Bugfix] V1 Fix the cursor leakage issue
 during request scheduling." (#22223)

---
 tests/v1/core/test_scheduler.py | 97 +--------------------------------
 vllm/v1/core/sched/scheduler.py |  6 +-
 2 files changed, 3 insertions(+), 100 deletions(-)

diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py
index 3f82261a59a76..c719d1975bba2 100644
--- a/tests/v1/core/test_scheduler.py
+++ b/tests/v1/core/test_scheduler.py
@@ -1307,18 +1307,13 @@ def create_requests_with_priority(
         mm_positions: Optional[list[PlaceholderRange]] = None,
         max_tokens: int = 16,
         stop_token_ids: Optional[list[int]] = None,
-        prompt_logprobs: Optional[int] = None,
-        request_ids: Optional[list[str]] = None):
+        prompt_logprobs: Optional[int] = None):
     """Create requests with specified priorities and arrival times."""
     assert len(priorities) == num_requests
     if arrival_times is not None:
         assert len(arrival_times) == num_requests
     else:
         arrival_times = [float(i) for i in range(num_requests)]
-    if request_ids is not None:
-        assert len(request_ids) == num_requests
-    else:
-        request_ids = [f"{i}" for i in range(num_requests)]
 
     sampling_params = SamplingParams(ignore_eos=False,
                                      max_tokens=max_tokens,
@@ -1333,7 +1328,7 @@ def create_requests_with_priority(
             mm_position = None
             mm_inputs = None
         request = Request(
-            request_id=request_ids[i],
+            request_id=f"{i}",
             prompt_token_ids=[i] * num_tokens,
             sampling_params=sampling_params,
             pooling_params=None,
@@ -1834,91 +1829,3 @@ def test_schedule_skip_tokenizer_init_structured_output_request():
     assert len(output.scheduled_new_reqs) == 0
     assert len(scheduler.running) == 0
     assert len(scheduler.waiting) == 1
-
-
-def test_priority_scheduling_preemption_victim_iterator_order():
-    """Test that the scheduling order is maintained after
-    preempting lower-priority requests."""
-    scheduler = create_scheduler_with_priority(
-        max_num_batched_tokens=200,
-        num_blocks=9,
-    )
-    # Add three priority requests first.
-    priority_requests = create_requests_with_priority(
-        num_requests=3,
-        priorities=[3, 4, 5],
-        arrival_times=[1.0, 2.0, 3.0],
-        num_tokens=15,
-        request_ids=["1", "2", "3"],
-    )
-
-    for request in priority_requests:
-        scheduler.add_request(request)
-    # After scheduling, transfer from the waiting queue to the running queue.
-    # At this time, 3 blocks have been allocated, and 5 available blocks remain.
-    output = scheduler.schedule()
-
-    model_output = ModelRunnerOutput(
-        req_ids=[req.request_id for req in priority_requests],
-        req_id_to_index={
-            req.request_id: i
-            for i, req in enumerate(priority_requests)
-        },
-        sampled_token_ids=[[15] for _ in priority_requests],
-        spec_token_ids=None,
-        logprobs=None,
-        prompt_logprobs_dict={},
-        pooler_output=[],
-    )
-    scheduler.update_from_output(output, model_output)
-
-    # Add tow high priority requests.
-    high_priority_requests = create_requests_with_priority(
-        num_requests=2,
-        priorities=[1, 2],
-        arrival_times=[4.0, 5.0],
-        num_tokens=16,
-        request_ids=["4", "5"],
-    )
-    for request in high_priority_requests:
-        scheduler.add_request(request)
-
-    # After scheduling, transfer the two high-priority requests from
-    # the waiting queue to the running queue.
-    # the IDs of the requests in the running queue are: 1, 2, 3, 4, 5.
-    # At this time, 3+2 blocks have been allocated,
-    # and 3 available blocks remain.
-    output = scheduler.schedule()
-
-    merge_requests = priority_requests + high_priority_requests
-
-    model_output = ModelRunnerOutput(
-        req_ids=[req.request_id for req in merge_requests],
-        req_id_to_index={
-            req.request_id: i
-            for i, req in enumerate(merge_requests)
-        },
-        sampled_token_ids=[[1] for _ in merge_requests],
-        spec_token_ids=None,
-        logprobs=None,
-        prompt_logprobs_dict={},
-        pooler_output=[],
-    )
-    scheduler.update_from_output(output, model_output)
-
-    # At this time, the request with the lowest priority
-    # (request.id = 2) will be preempted, freeing up 2 blocks,
-    # which exactly meets the resource allocation requirements
-    # for request.id = 4 and request.id = 5.
-    output = scheduler.schedule()
-
-    # Should schedule the new request without preemption.
-    assert len(scheduler.running) == 4  #
-    assert len(scheduler.waiting) == 1  #
-
-    running_priorities = [req.priority for req in scheduler.running]
-    running_req_ids = [req.request_id for req in scheduler.running]
-
-    assert running_priorities == [3, 4, 1, 2]
-    assert running_req_ids == ["1", "2", "4", "5"]
-    assert scheduler.waiting.peek_request().priority == 5
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
index 413a853dfecbc..49a744cfec69a 100644
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -257,11 +257,7 @@ class Scheduler(SchedulerInterface):
                             self.running,
                             key=lambda r: (r.priority, r.arrival_time),
                         )
-                        preempted_index = self.running.index(preempted_req)
-                        if preempted_index <= req_index:
-                            req_index -= 1
-                            scheduled_running_reqs.remove(preempted_req)
-                        self.running.pop(preempted_index)
+                        self.running.remove(preempted_req)
                     else:
                         preempted_req = self.running.pop()
 

From 5ea71ff46fe503df12f18ad41d40f5c2b18dcfcd Mon Sep 17 00:00:00 2001
From: Giancarlo Delfin <32987265+TheEpicDolphin@users.noreply.github.com>
Date: Mon, 4 Aug 2025 19:11:06 -0700
Subject: [PATCH 201/224] =?UTF-8?q?[V1]=20reduce=20block=20size=20for=20tr?=
 =?UTF-8?q?ee=20attention=20correctness=20test=20to=20fix=20'ou=E2=80=A6?=
 =?UTF-8?q?=20(#22207)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Giancarlo Delfin <gdelfin@meta.com>
---
 tests/v1/spec_decode/test_tree_attention.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/v1/spec_decode/test_tree_attention.py b/tests/v1/spec_decode/test_tree_attention.py
index 42468daa62a9a..456ce712d36e4 100644
--- a/tests/v1/spec_decode/test_tree_attention.py
+++ b/tests/v1/spec_decode/test_tree_attention.py
@@ -155,7 +155,7 @@ def test_tree_attn_correctness() -> None:
 
     dim_per_head = 128
     num_kv_heads = 2
-    block_size = 128
+    block_size = 32
     max_sequence_length = 8192
     randomize_blocks = True
     for batch_size in [1, 16, 32]:

From f4f4e7ef273645192fac837718b3fdcf073c597a Mon Sep 17 00:00:00 2001
From: lkchen <github@lkchen.net>
Date: Mon, 4 Aug 2025 19:11:33 -0700
Subject: [PATCH 202/224] [V0 deprecation][P/D] Deprecate v0 `KVConnectorBase`
 code (1/2) (#21785)

Signed-off-by: Linkun Chen <github@lkchen.net>
---
 .buildkite/test-pipeline.yaml                 |   1 -
 tests/kv_transfer/test_disagg.py              | 120 -------
 .../kv_transfer/kv_connector/base.py          | 140 +-------
 .../kv_transfer/kv_connector/factory.py       |  68 +---
 .../kv_connector/lmcache_connector.py         |  99 ------
 .../kv_connector/mooncake_store_connector.py  | 203 -----------
 .../kv_connector/simple_connector.py          | 329 ------------------
 .../kv_transfer/kv_connector/utils.py         |   9 +-
 .../kv_connector/v1/multi_connector.py        |   8 +-
 .../kv_transfer/kv_connector_agent.py         |  77 ----
 .../kv_transfer/kv_transfer_state.py          |   9 +-
 vllm/v1/core/sched/scheduler.py               |   2 +-
 .../worker/kv_connector_model_runner_mixin.py |   6 +-
 13 files changed, 31 insertions(+), 1040 deletions(-)
 delete mode 100644 tests/kv_transfer/test_disagg.py
 delete mode 100644 vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py
 delete mode 100644 vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py
 delete mode 100644 vllm/distributed/kv_transfer/kv_connector/simple_connector.py
 delete mode 100644 vllm/distributed/kv_transfer/kv_connector_agent.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 88e1197d703a4..b7a2ca6ca9b24 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -749,7 +749,6 @@ steps:
   # this test fails consistently.
   # TODO: investigate and fix
   - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
-  - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/test_disagg.py
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
   - pytest -v -s models/multimodal/generation/test_maverick.py
 
diff --git a/tests/kv_transfer/test_disagg.py b/tests/kv_transfer/test_disagg.py
deleted file mode 100644
index 9f2229cc41dff..0000000000000
--- a/tests/kv_transfer/test_disagg.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import os
-import subprocess
-import sys
-import time
-from subprocess import Popen
-
-import pytest
-import requests
-import torch
-
-
-# Fixture to set up environment variables and teardown servers after tests
-@pytest.fixture(scope="module", autouse=True)
-def setup_servers():
-    if torch.cuda.device_count() < 2:
-        pytest.skip("Skipping test: fewer than 2 GPUs available")
-
-    # Set up environment variables
-    VLLM_HOST_IP = subprocess.check_output("hostname -I | awk '{print $1}'",
-                                           shell=True).decode().strip()
-    os.environ["VLLM_HOST_IP"] = VLLM_HOST_IP
-
-    # Start prefill instance
-    prefill_cmd = [
-        sys.executable,
-        "-m",
-        "vllm.entrypoints.openai.api_server",
-        "--model",
-        "meta-llama/Llama-3.2-1B-Instruct",
-        "--port",
-        "8100",
-        "--gpu-memory-utilization",
-        "0.5",
-        "--max-model-len",
-        "1000",
-        "--kv-transfer-config",
-        '{"kv_connector":"PyNcclConnector","kv_role":"kv_producer",'\
-        '"kv_rank":0,"kv_parallel_size":2}',
-    ]
-    prefill_env = os.environ.copy()
-    prefill_env["CUDA_VISIBLE_DEVICES"] = "0"
-    prefill_proc = Popen(prefill_cmd, env=prefill_env)
-
-    # Start decode instance
-    decode_cmd = [
-        sys.executable,
-        "-m",
-        "vllm.entrypoints.openai.api_server",
-        "--model",
-        "meta-llama/Llama-3.2-1B-Instruct",
-        "--port",
-        "8200",
-        "--gpu-memory-utilization",
-        "0.5",
-        "--max-model-len",
-        "1000",
-        "--kv-transfer-config",
-        '{"kv_connector":"PyNcclConnector","kv_role":"kv_consumer",'\
-        '"kv_rank":1,"kv_parallel_size":2}',
-    ]
-    decode_env = os.environ.copy()
-    decode_env["CUDA_VISIBLE_DEVICES"] = "1"
-    decode_proc = Popen(decode_cmd, env=decode_env)
-
-    # Wait for servers to be ready
-    assert wait_for_server(8100), "Prefill server did not start in time"
-    assert wait_for_server(8200), "Decode server did not start in time"
-
-    # Yield to the test function and handle teardown after tests
-    yield
-
-    # Cleanup: kill the processes
-    prefill_proc.terminate()
-    decode_proc.terminate()
-
-    # Additional cleanup if needed
-    prefill_proc.wait()
-    decode_proc.wait()
-
-
-# Helper function to wait for server
-def wait_for_server(port, timeout=240):
-    start_time = time.time()
-    while time.time() - start_time < timeout:
-        try:
-            response = requests.get(f"http://localhost:{port}/v1/completions")
-            if response.status_code in [200, 405]:
-                return True
-        except requests.ConnectionError:
-            time.sleep(1)
-    return False
-
-
-# Test function to send curl requests and validate responses
-@pytest.mark.parametrize("prompt", ["San Francisco is a", "Santa Clara is a"])
-def test_disaggregated_prefilling(prompt):
-    # Send to prefill
-    response = requests.post("http://localhost:8100/v1/completions",
-                             headers={"Content-Type": "application/json"},
-                             json={
-                                 "model": "meta-llama/Llama-3.2-1B-Instruct",
-                                 "prompt": prompt,
-                                 "max_tokens": 1,
-                                 "temperature": 0
-                             })
-    assert response.status_code == 200
-
-    # Send to decode
-    response = requests.post("http://localhost:8200/v1/completions",
-                             headers={"Content-Type": "application/json"},
-                             json={
-                                 "model": "meta-llama/Llama-3.2-1B-Instruct",
-                                 "prompt": prompt,
-                                 "max_tokens": 10,
-                                 "temperature": 0
-                             })
-    assert response.status_code == 200
diff --git a/vllm/distributed/kv_transfer/kv_connector/base.py b/vllm/distributed/kv_transfer/kv_connector/base.py
index 868b227fc8994..011bbb69abb08 100644
--- a/vllm/distributed/kv_transfer/kv_connector/base.py
+++ b/vllm/distributed/kv_transfer/kv_connector/base.py
@@ -1,142 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-KVConnectorBase Class for Distributed KV Cache & Hidden State communication
-
-The class provides two primary abstract methods:
-1. send_kv_caches_and_hidden_states(): Send KV caches and hidden states
-2. recv_kv_caches_and_hidden_states(): Recv KV caches and hidden states
-"""
-
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Optional, Union
-
-import torch
+"""Defines the base type for KV cache connectors."""
 
 from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorBase_V1
-from vllm.sequence import IntermediateTensors
 
-if TYPE_CHECKING:
-    from vllm.config import VllmConfig
-    from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata
+KVConnectorBase = KVConnectorBase_V1
+KVConnectorBaseType = KVConnectorBase_V1
 
-
-class KVConnectorBase(ABC):
-    """
-    Abstract base class for a KV connector.
-
-    The class provides two primary abstract methods:
-    1. send_kv_caches_and_hidden_states(): Send KV caches and hidden states
-    2. recv_kv_caches_and_hidden_states(): Recv KV caches and hidden states
-    """
-
-    @abstractmethod
-    def __init__(
-        self,
-        rank: int,
-        local_rank: int,
-        config: "VllmConfig",
-    ):
-        raise NotImplementedError
-
-    @abstractmethod
-    def close(self) -> None:
-        """Close the buffer and release resources.
-
-        This method is responsible for cleaning up resources related to the 
-        connector when it is no longer needed.
-
-        Raises:
-            NotImplementedError: This method must be implemented in subclasses.
-        """
-        raise NotImplementedError
-
-    @abstractmethod
-    def send_kv_caches_and_hidden_states(
-        self,
-        model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor],
-        hidden_or_intermediate_states: Union[torch.Tensor,
-                                             IntermediateTensors],
-    ) -> None:
-        """
-        Send KV caches and hidden states to the connector.
-
-        This method processes the input tokens, KV caches, and 
-        hidden/intermediate states for a given model and sends the data to the 
-        decode instance.
-
-        Args:
-            model_executable (torch.nn.Module): The model executable containing 
-                start and end layer information.
-            model_input (ModelInputForGPUWithSamplingMetadata): The input
-                metadata from vLLM.
-            kv_caches (list[torch.Tensor]): List of KV caches (keys and values) 
-                for each layer.
-            hidden_or_intermediate_states (Union[torch.Tensor, 
-            IntermediateTensors]): 
-                The hidden or intermediate states associated with the tokens.
-
-        Returns:
-            None
-
-        """
-
-        raise NotImplementedError
-
-    @abstractmethod
-    def recv_kv_caches_and_hidden_states(
-        self, model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor]
-    ) -> tuple[Union[torch.Tensor, IntermediateTensors], bool,
-               "ModelInputForGPUWithSamplingMetadata"]:
-        """
-        Receive KV caches and hidden states from the connector.
-
-        This method attempts to retrieve KV caches and hidden states for input
-        tokens. If all required KV caches and hidden states are received, it
-        will bypass model input, else it will fall back to normal vLLM model 
-        forwarding.
-
-        Args:
-            model_executable (torch.nn.Module): 
-                The model executable from vLLM modelrunner.
-            model_input (ModelInputForGPUWithSamplingMetadata): 
-                The model input from vLLM modelrunner.
-            kv_caches (list[torch.Tensor]): 
-                List of KV caches for each layer.
-
-        Returns:
-            - hidden_or_intermediate_states (torch.Tensor or
-            IntermediateTensors): 
-                Concatenated hidden states if all required data is retrieved, 
-                otherwise `None`.
-            - bypass_model_exec (bool): 
-                Indicates whether the model execution can be skipped (True) or 
-                needs to be redone (False).
-            - model_input (ModelInputForGPUWithSamplingMetadata): 
-                Optionally adjusted input metadata for re-execution when 
-                `bypass_model_exec=False`.
-
-        """
-
-        raise NotImplementedError
-
-    @classmethod
-    def get_required_kvcache_layout(
-            cls, vllm_config: "VllmConfig") -> Optional[str]:
-        """
-        Get the required KV cache layout for this connector.
-        Args:
-            vllm_config (VllmConfig): the vllm config.
-
-        Returns:
-            str: the required KV cache layout. e.g. HND, or NHD.
-            None if the connector does not require a specific layout.
-        """
-        return None
-
-
-KVConnectorBaseType = Union[KVConnectorBase, KVConnectorBase_V1]
+__all__ = ["KVConnectorBase", "KVConnectorBaseType"]
diff --git a/vllm/distributed/kv_transfer/kv_connector/factory.py b/vllm/distributed/kv_transfer/kv_connector/factory.py
index cf7cde2c43771..01673a0d7c876 100644
--- a/vllm/distributed/kv_transfer/kv_connector/factory.py
+++ b/vllm/distributed/kv_transfer/kv_connector/factory.py
@@ -5,14 +5,10 @@ import importlib
 from typing import TYPE_CHECKING, Callable
 
 import vllm.envs as envs
-from vllm.config import KVTransferConfig
-from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBaseType
-from vllm.distributed.kv_transfer.kv_connector.v1 import (KVConnectorBase_V1,
-                                                          KVConnectorRole)
+from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
+from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorRole
 from vllm.logger import init_logger
 
-from .base import KVConnectorBase
-
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
 
@@ -20,7 +16,7 @@ logger = init_logger(__name__)
 
 
 class KVConnectorFactory:
-    _registry: dict[str, Callable[[], type[KVConnectorBaseType]]] = {}
+    _registry: dict[str, Callable[[], type[KVConnectorBase]]] = {}
 
     @classmethod
     def register_connector(cls, name: str, module_path: str,
@@ -29,28 +25,23 @@ class KVConnectorFactory:
         if name in cls._registry:
             raise ValueError(f"Connector '{name}' is already registered.")
 
-        def loader() -> type[KVConnectorBaseType]:
+        def loader() -> type[KVConnectorBase]:
             module = importlib.import_module(module_path)
             return getattr(module, class_name)
 
         cls._registry[name] = loader
 
     @classmethod
-    def create_connector_v0(cls, rank: int, local_rank: int,
-                            config: "VllmConfig") -> KVConnectorBase:
-        if envs.VLLM_USE_V1:
-            raise ValueError("Attempting to initialize a V0 Connector, "
+    def create_connector(
+        cls,
+        config: "VllmConfig",
+        role: KVConnectorRole,
+    ) -> KVConnectorBase:
+        if not envs.VLLM_USE_V1:
+            raise ValueError("Attempting to initialize a V1 Connector, "
                              f"but found {envs.VLLM_USE_V1=}")
 
-        connector_cls = cls.get_connector_class(config.kv_transfer_config)
-        assert issubclass(connector_cls, KVConnectorBase)
-        return connector_cls(rank, local_rank, config)
-
-    @classmethod
-    def get_connector_class(
-            cls, kv_transfer_config: "KVTransferConfig"
-    ) -> type[KVConnectorBaseType]:
-        """Get the connector class by name."""
+        kv_transfer_config = config.kv_transfer_config
         connector_name = kv_transfer_config.kv_connector
         if connector_name in cls._registry:
             connector_cls = cls._registry[connector_name]()
@@ -61,21 +52,7 @@ class KVConnectorFactory:
                     f"Unsupported connector type: {connector_name}")
             connector_module = importlib.import_module(connector_module_path)
             connector_cls = getattr(connector_module, connector_name)
-        return connector_cls
-
-    @classmethod
-    def create_connector_v1(
-        cls,
-        config: "VllmConfig",
-        role: KVConnectorRole,
-    ) -> KVConnectorBase_V1:
-        if not envs.VLLM_USE_V1:
-            raise ValueError("Attempting to initialize a V1 Connector, "
-                             f"but found {envs.VLLM_USE_V1=}")
-
-        kv_transfer_config = config.kv_transfer_config
-        connector_cls = cls.get_connector_class(kv_transfer_config)
-        assert issubclass(connector_cls, KVConnectorBase_V1)
+        assert issubclass(connector_cls, KVConnectorBase)
         logger.info("Creating v1 connector with name: %s and engine_id: %s",
                     connector_cls.__name__, kv_transfer_config.engine_id)
         # NOTE(Kuntai): v1 connector is explicitly separated into two roles.
@@ -92,25 +69,6 @@ class KVConnectorFactory:
 # Register various connectors here.
 # The registration should not be done in each individual file, as we want to
 # only load the files corresponding to the current connector.
-KVConnectorFactory.register_connector(
-    "PyNcclConnector",
-    "vllm.distributed.kv_transfer.kv_connector.simple_connector",
-    "SimpleConnector")
-
-KVConnectorFactory.register_connector(
-    "MooncakeConnector",
-    "vllm.distributed.kv_transfer.kv_connector.simple_connector",
-    "SimpleConnector")
-
-KVConnectorFactory.register_connector(
-    "LMCacheConnector",
-    "vllm.distributed.kv_transfer.kv_connector.lmcache_connector",
-    "LMCacheConnector")
-
-KVConnectorFactory.register_connector(
-    "MooncakeStoreConnector",
-    "vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector",
-    "MooncakeStoreConnector")
 
 KVConnectorFactory.register_connector(
     "SharedStorageConnector",
diff --git a/vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py b/vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py
deleted file mode 100644
index 78bf3095613a7..0000000000000
--- a/vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-LMCache KV Cache Connector for Distributed Machine Learning Inference
-
-The LMCacheConnector can (1) transfer KV caches between prefill vLLM worker
-(KV cache producer) and decode vLLM worker (KV cache consumer) using LMCache;
-(2) offload and share KV caches.
-"""
-
-from typing import TYPE_CHECKING, Union
-
-import torch
-
-from vllm.config import VllmConfig
-from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
-from vllm.logger import init_logger
-from vllm.sequence import IntermediateTensors
-
-if TYPE_CHECKING:
-    from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata
-
-logger = init_logger(__name__)
-
-
-class LMCacheConnector(KVConnectorBase):
-
-    def __init__(
-        self,
-        rank: int,
-        local_rank: int,
-        config: VllmConfig,
-    ):
-
-        self.transfer_config = config.kv_transfer_config
-        self.vllm_config = config
-
-        from lmcache.experimental.cache_engine import LMCacheEngineBuilder
-        from lmcache.integration.vllm.utils import ENGINE_NAME
-        from lmcache.integration.vllm.vllm_adapter import (
-            RetrieveStatus, StoreStatus, init_lmcache_engine,
-            lmcache_retrieve_kv, lmcache_should_retrieve, lmcache_should_store,
-            lmcache_store_kv)
-        logger.info("Initializing LMCacheConfig under kv_transfer_config %s",
-                    self.transfer_config)
-
-        # TODO (Jiayi): Find model_config, parallel_config, and cache_config
-        self.engine = init_lmcache_engine(config.model_config,
-                                          config.parallel_config,
-                                          config.cache_config)
-        self.lmcache_engine_name = ENGINE_NAME
-        self.lmcache_engine_builder = LMCacheEngineBuilder
-
-        self.model_config = config.model_config
-        self.parallel_config = config.parallel_config
-        self.cache_config = config.cache_config
-        self.lmcache_retrieve_kv = lmcache_retrieve_kv
-        self.lmcache_store_kv = lmcache_store_kv
-        self.lmcache_should_retrieve = lmcache_should_retrieve
-        self.lmcache_should_store = lmcache_should_store
-        self.store_status = StoreStatus
-        self.retrieve_status = RetrieveStatus
-
-    def recv_kv_caches_and_hidden_states(
-        self, model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor]
-    ) -> tuple[Union[torch.Tensor, IntermediateTensors], bool,
-               "ModelInputForGPUWithSamplingMetadata"]:
-
-        retrieve_status = self.lmcache_should_retrieve(model_input)
-        model_input, bypass_model_exec, hidden_or_intermediate_states =\
-            self.lmcache_retrieve_kv(
-                model_executable, model_input, self.cache_config, kv_caches,
-                retrieve_status)
-        return hidden_or_intermediate_states, bypass_model_exec, model_input
-
-    def send_kv_caches_and_hidden_states(
-        self,
-        model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor],
-        hidden_or_intermediate_states: Union[torch.Tensor,
-                                             IntermediateTensors],
-    ) -> None:
-
-        store_status = self.lmcache_should_store(model_input)
-        self.lmcache_store_kv(
-            self.model_config,
-            self.parallel_config,
-            self.cache_config,
-            model_executable,
-            model_input,
-            kv_caches,
-            store_status,
-        )
-
-    def close(self):
-        self.lmcache_engine_builder.destroy(self.lmcache_engine_name)
diff --git a/vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py b/vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py
deleted file mode 100644
index 94a7ce91acf17..0000000000000
--- a/vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-MooncakeStore Connector for Distributed Machine Learning Inference
-The MooncakeStoreConnector transfers KV caches between prefill vLLM workers
-(KV cache producer) and decode vLLM workers (KV cache consumer) using a
-database-style KVStore.
-"""
-import hashlib
-from typing import TYPE_CHECKING, Union
-
-import torch
-
-from vllm.config import VllmConfig
-from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
-from vllm.distributed.kv_transfer.kv_connector.utils import (
-    model_aware_kv_ops_helper as kv_helper)
-from vllm.logger import init_logger
-from vllm.sequence import IntermediateTensors
-
-if TYPE_CHECKING:
-    from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata
-
-logger = init_logger(__name__)
-
-
-class MooncakeStoreConnector(KVConnectorBase):
-
-    def __init__(
-        self,
-        rank: int,
-        local_rank: int,
-        config: VllmConfig,
-    ):
-        self.kv_transfer_config = config.kv_transfer_config
-        self.kv_helper = kv_helper(config)
-        self.local_tp_rank = local_rank
-
-        # Init kv_store
-        if self.kv_transfer_config.kv_connector == "MooncakeStoreConnector":
-            # Check if MOONCAKE_CONFIG_PATH is set
-            import os
-            use_mooncake_store = os.getenv('MOONCAKE_CONFIG_PATH') is not None
-
-            if not use_mooncake_store:
-                raise ValueError(
-                    "To use MooncakeStoreConnector, you need to pass the ENV: "
-                    "'MOONCAKE_CONFIG_PATH=/path/to/mooncake_config.json'.")
-            else:
-                from vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store import (  # noqa: E501
-                    MooncakeStore)
-                logger.info(
-                    "Initializing KVStoreConnector under kv_transfer_config %s",
-                    self.kv_transfer_config)
-                self.kv_store = MooncakeStore(config)
-        else:
-            logger.error("Can not find %s",
-                         self.kv_transfer_config.kv_connector)
-
-        assert self.kv_store is not None
-
-    def close(self) -> None:
-        """Close the buffer and release resources.
-        This method is responsible for cleaning up resources related to the 
-        connector when it is no longer needed.
-        Raises:
-            NotImplementedError: This method must be implemented in subclasses.
-        """
-        self.kv_store.close()
-
-    def send_kv_caches_and_hidden_states(
-        self,
-        model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor],
-        hidden_or_intermediate_states: Union[torch.Tensor,
-                                             IntermediateTensors],
-    ) -> None:
-        input_tokens_tensor = model_input.input_tokens
-        seq_lens = model_input.attn_metadata.seq_lens
-        slot_mapping_flat = model_input.attn_metadata.slot_mapping.flatten()
-        start_layer = model_executable.model.start_layer
-        end_layer = model_executable.model.end_layer
-        num_heads, head_size = self.kv_helper.get_model_args(model_executable)
-
-        for idx, slen in enumerate(seq_lens):
-            start_pos = sum(seq_lens[:idx])
-            end_pos = start_pos + slen
-
-            current_tokens = input_tokens_tensor[start_pos:end_pos]
-            store_key_prefix = self.tensor_hash(current_tokens)
-            keys, values = [], []
-
-            for layer_id in range(start_layer, end_layer):
-                kv_cache = kv_caches[layer_id - start_layer]
-                key_cache, value_cache = self.kv_helper.get_kv_from_cache(
-                    kv_cache, num_heads, head_size)
-                current_slot_mapping = slot_mapping_flat[start_pos:end_pos]
-
-                keys.append(key_cache[current_slot_mapping].unsqueeze(0))
-                values.append(value_cache[current_slot_mapping].unsqueeze(0))
-
-            keys = torch.cat(keys, dim=0)
-            values = torch.cat(values, dim=0)
-            kvcache_to_sent = torch.stack((keys, values), dim=0)
-            store_kvcache_key = f"{store_key_prefix}_{self.local_tp_rank}"
-            self.kv_store.put(store_kvcache_key, kvcache_to_sent)
-
-            hidden_key = f"{store_key_prefix}_hidden_{self.local_tp_rank}"
-            self.kv_store.put(hidden_key,
-                              hidden_or_intermediate_states[start_pos:end_pos])
-
-        logger.debug("[rank%d]: KV send DONE.", torch.distributed.get_rank())
-
-    def recv_kv_caches_and_hidden_states(
-        self, model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor]
-    ) -> tuple[Union[torch.Tensor, IntermediateTensors], bool,
-               "ModelInputForGPUWithSamplingMetadata"]:
-        bypass_model_exec = True
-        input_tokens_tensor = model_input.input_tokens
-        seq_lens = model_input.attn_metadata.seq_lens
-        num_prefill_tokens = model_input.attn_metadata.num_prefill_tokens
-        slot_mapping = model_input.attn_metadata.slot_mapping.flatten()
-        start_layer = model_executable.model.start_layer
-        end_layer = model_executable.model.end_layer
-        hidden_or_intermediate_states_for_one_req = []
-
-        for idx, slen in enumerate(seq_lens):
-            start_pos = sum(seq_lens[:idx])
-            end_pos = start_pos + slen
-
-            if start_pos >= num_prefill_tokens:
-                # This can happen during inflight batching. See:
-                # vllm/worker/model_runner.py::_prepare_model_input_tensors:
-                # - input_tokens[:num_prefill_tokens] contains prefill tokens.
-                # - input_tokens[num_prefill_tokens:] contains decode tokens.
-                logger.warning("You should set --enable_chunked_prefill=False "
-                               "and --max_num_batched_tokens "
-                               "should be equal to max_seq_len_to_capture")
-                bypass_model_exec = False
-                assert start_pos == num_prefill_tokens
-                break
-
-            current_tokens = input_tokens_tensor[start_pos:end_pos]
-
-            # get roi for current seq
-            load_key_prefix = self.tensor_hash(current_tokens)
-            load_kvcache_key = f"{load_key_prefix}_{self.local_tp_rank}"
-            remote_kv = self.kv_store.get(load_kvcache_key)
-            hidden_key = f"{load_key_prefix}_hidden_{self.local_tp_rank}"
-            hidden = self.kv_store.get(hidden_key)
-
-            if remote_kv is None or hidden is None:
-                # didn't find any match.
-                bypass_model_exec = False
-                continue
-
-            num_computed_tokens = current_tokens.shape[0]
-
-            # update the end position based on how many tokens are cached.
-            end_pos = start_pos + num_computed_tokens
-
-            # call self.kv_store to get kv layer by layer
-            for layer_id in range(start_layer, end_layer):
-                layer = model_executable.model.layers[layer_id]
-                # get kvcache object
-                kv_cache = kv_caches[layer_id - start_layer]
-
-                # get remote kvcache
-                remote_k, remote_v = remote_kv[0][layer_id], remote_kv[1][
-                    layer_id]
-
-                self.kv_helper.put_kv_to_cache(model_executable, remote_k,
-                                               remote_v, layer, kv_cache,
-                                               slot_mapping, start_pos,
-                                               end_pos)
-
-            hidden_or_intermediate_states_for_one_req.append(hidden)
-
-        if not bypass_model_exec:
-            logger.warning(
-                "[rank%d]: Failed to receive all KVs and hidden "
-                "states, redo model forwarding.", torch.distributed.get_rank())
-            hidden_or_intermediate_states = None
-
-        else:
-            logger.debug(
-                "[rank%d]: Successfully received all KVs and hidden "
-                "states, skip model forwarding.", torch.distributed.get_rank())
-            hidden_or_intermediate_states = torch.cat(
-                hidden_or_intermediate_states_for_one_req, dim=0)
-
-        return hidden_or_intermediate_states, bypass_model_exec, model_input
-
-    @staticmethod
-    def tensor_hash(tensor: torch.Tensor) -> int:
-        """Calculate the hash value of the tensor."""
-        tensor_bytes = tensor.clone().detach().cpu().numpy().tobytes()
-        hash_object = hashlib.blake2b(tensor_bytes)
-        hash_hex = hash_object.hexdigest()
-        return int(hash_hex[:16], 16)
diff --git a/vllm/distributed/kv_transfer/kv_connector/simple_connector.py b/vllm/distributed/kv_transfer/kv_connector/simple_connector.py
deleted file mode 100644
index e7c079e1f115c..0000000000000
--- a/vllm/distributed/kv_transfer/kv_connector/simple_connector.py
+++ /dev/null
@@ -1,329 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-Simple KV Cache Connector for Distributed Machine Learning Inference
-
-The SimpleConnector transfers KV caches between prefill vLLM worker (KV cache
-producer) and decode vLLM worker (KV cache consumer) using PyNcclPipe or
-MooncakePipe.
-
-But the logic can be extended to support other pipe and lookup buffer.
-"""
-from typing import TYPE_CHECKING, Optional, Union
-
-import torch
-
-from vllm.config import VllmConfig
-from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
-from vllm.distributed.kv_transfer.kv_connector.utils import (
-    model_aware_kv_ops_helper as kv_helper)
-from vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer import (
-    SimpleBuffer)
-from vllm.logger import init_logger
-from vllm.sequence import IntermediateTensors
-
-if TYPE_CHECKING:
-    from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata
-
-logger = init_logger(__name__)
-
-
-class SimpleConnector(KVConnectorBase):
-
-    def __init__(
-        self,
-        rank: int,
-        local_rank: int,
-        config: VllmConfig,
-    ):
-
-        self.config = config.kv_transfer_config
-        self.kv_helper = kv_helper(config)
-
-        if self.config.kv_connector == "PyNcclConnector":
-            from vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe import (
-                PyNcclPipe)
-            logger.info(
-                "Initializing PyNcclConfig under kv_transfer_config %s",
-                self.config)
-        elif self.config.kv_connector == "MooncakeConnector":
-            # Check if MOONCAKE_CONFIG_PATH is set
-            import os
-            use_mooncake_distributed_pipe = os.getenv(
-                'MOONCAKE_CONFIG_PATH') is not None
-
-            if not use_mooncake_distributed_pipe:
-                raise ValueError(
-                    "To use MooncakeConnector, you need to pass the ENV: "
-                    "'MOONCAKE_CONFIG_PATH=/path/to/mooncake_config.json'.")
-            else:
-                from vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe import (  # noqa: E501
-                    MooncakePipe)
-                logger.info(
-                    "Initializing MooncakeConfig under kv_transfer_config %s",
-                    self.config)
-
-        self.lookup_buffer_size = self.config.kv_buffer_size
-
-        self.producer_buffer: Optional[SimpleBuffer] = None
-        self.consumer_buffer: Optional[SimpleBuffer] = None
-
-        self.producer_data_pipe: Union[PyNcclPipe, MooncakePipe]
-        self.consumer_data_pipe: Union[PyNcclPipe, MooncakePipe]
-        self.producer_signal_pipe: Union[PyNcclPipe, MooncakePipe]
-        self.consumer_signal_pipe: Union[PyNcclPipe, MooncakePipe]
-
-        # 2 pipes for every rank in the world
-        port_offset_base = 2 * rank
-
-        # In disaggregated prefill, the prefill vLLM only uses send pipe
-        # and the decode vLLM only uses recv pipe
-        if self.config.is_kv_producer:
-
-            if self.config.kv_connector == "PyNcclConnector":
-                self.producer_data_pipe = PyNcclPipe(
-                    local_rank=local_rank,
-                    config=self.config,
-                    port_offset=port_offset_base,
-                )
-                self.producer_signal_pipe = PyNcclPipe(
-                    local_rank=local_rank,
-                    config=self.config,
-                    port_offset=port_offset_base + 1,
-                    device="cpu",
-                )
-            elif self.config.kv_connector == "MooncakeConnector":
-                self.producer_data_pipe = MooncakePipe(
-                    local_rank=local_rank,
-                    config=self.config,
-                )
-                # We only need to initialize MooncakePipe once
-                self.producer_signal_pipe = self.producer_data_pipe
-
-            self.producer_buffer = SimpleBuffer(self.producer_signal_pipe,
-                                                self.producer_data_pipe,
-                                                self.config.kv_buffer_size)
-
-        else:
-
-            # the current vLLM instance is KV consumer, so it needs to connect
-            # its recv pipe to the send pipe of KV producer
-            if self.config.kv_connector == "PyNcclConnector":
-                self.consumer_data_pipe = PyNcclPipe(
-                    local_rank=local_rank,
-                    config=self.config,
-                    port_offset=port_offset_base,
-                )
-                self.consumer_signal_pipe = PyNcclPipe(
-                    local_rank=local_rank,
-                    config=self.config,
-                    port_offset=port_offset_base + 1,
-                    device="cpu",
-                )
-            elif self.config.kv_connector == "MooncakeConnector":
-                self.consumer_data_pipe = MooncakePipe(
-                    local_rank=local_rank,
-                    config=self.config,
-                )
-                self.consumer_signal_pipe = self.consumer_data_pipe
-
-            self.consumer_buffer = SimpleBuffer(
-                self.consumer_signal_pipe,
-                self.consumer_data_pipe,
-                self.config.kv_buffer_size,
-            )
-
-    def select(self, input_tokens: Optional[torch.Tensor],
-               roi: Optional[torch.Tensor]) -> list[Optional[torch.Tensor]]:
-
-        assert self.consumer_buffer is not None, "Please initialize the "\
-            "consumer buffer before calling select."
-        return self.consumer_buffer.drop_select(input_tokens, roi)
-
-    def insert(self, input_tokens: torch.Tensor, roi: torch.Tensor,
-               key: torch.Tensor, value: torch.Tensor,
-               hidden: torch.Tensor) -> None:
-
-        assert self.producer_buffer is not None, "Please initialize the "\
-            "producer buffer before calling insert."
-
-        self.producer_buffer.insert(input_tokens, roi, key, value, hidden)
-
-    def send_kv_caches_and_hidden_states(
-        self,
-        model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor],
-        hidden_or_intermediate_states: Union[torch.Tensor,
-                                             IntermediateTensors],
-    ) -> None:
-
-        input_tokens_tensor = model_input.input_tokens
-        seq_lens = model_input.attn_metadata.seq_lens
-        slot_mapping_flat = model_input.attn_metadata.slot_mapping.flatten()
-        num_prefill_tokens = model_input.attn_metadata.num_prefill_tokens
-        start_layer = model_executable.model.start_layer
-        end_layer = model_executable.model.end_layer
-        num_heads, head_size = self.kv_helper.get_model_args(model_executable)
-
-        # query_lens contains new KV caches that are added to vLLM.
-        # so we will send them to decode instance
-        # FIXME(Kuntai): This assume that all requests are prefill.
-        for idx, slen in enumerate(seq_lens):
-            start_pos = sum(seq_lens[:idx])
-            end_pos = start_pos + slen
-
-            if start_pos >= num_prefill_tokens:
-                # vllm/worker/model_runner.py::_prepare_model_input_tensors:
-                # - input_tokens[:num_prefill_tokens] contains prefill tokens.
-                # - input_tokens[num_prefill_tokens:] contains decode tokens.
-                logger.warning("You have some decode requests while using "
-                               "SimpleConnector. Their KVCache won't be sent.")
-                break
-
-            current_tokens = input_tokens_tensor[start_pos:end_pos]
-
-            keys, values = [], []
-
-            for layer_id in range(start_layer, end_layer):
-                kv_cache = kv_caches[layer_id - start_layer]
-                key_cache, value_cache = self.kv_helper.get_kv_from_cache(
-                    kv_cache, num_heads, head_size)
-
-                current_slot_mapping = slot_mapping_flat[start_pos:end_pos]
-
-                keys.append(key_cache[current_slot_mapping].unsqueeze(0))
-                values.append(value_cache[current_slot_mapping].unsqueeze(0))
-
-            keys = torch.cat(keys, dim=0)
-            values = torch.cat(values, dim=0)
-
-            self.insert(current_tokens,
-                        torch.ones_like(current_tokens,
-                                        dtype=bool), keys, values,
-                        hidden_or_intermediate_states[start_pos:end_pos])
-
-        logger.debug("[rank%d]: KV send DONE.", torch.distributed.get_rank())
-
-    def recv_kv_caches_and_hidden_states(
-        self, model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor]
-    ) -> tuple[Union[torch.Tensor, IntermediateTensors], bool,
-               "ModelInputForGPUWithSamplingMetadata"]:
-
-        # When bypass_model_exec is set to False, it means that at least for one
-        # request its corresponding KV cache or hidden state is missing.
-        # In this case we need to do prefilling to recompute missing KV cache
-        # and hidden states.
-        bypass_model_exec = True
-
-        input_tokens_tensor = model_input.input_tokens
-        seq_lens = model_input.attn_metadata.seq_lens
-        num_prefill_tokens = model_input.attn_metadata.num_prefill_tokens
-        slot_mapping = model_input.attn_metadata.slot_mapping.flatten()
-        start_layer = model_executable.model.start_layer
-        end_layer = model_executable.model.end_layer
-
-        hidden_or_intermediate_states_for_one_req = []
-
-        input_tokens_list = []
-        num_computed_tokens_list = []
-        start_pos_list = []
-
-        # enumerate different requests
-        # FIXME(Kuntai): This impl assumes that all requests are prefill.
-        for idx, slen in enumerate(seq_lens):
-            start_pos = sum(seq_lens[:idx])
-            end_pos = start_pos + slen
-
-            if start_pos >= num_prefill_tokens:
-                # This can happen during inflight batching. See:
-                # vllm/worker/model_runner.py::_prepare_model_input_tensors:
-                # - input_tokens[:num_prefill_tokens] contains prefill tokens.
-                # - input_tokens[num_prefill_tokens:] contains decode tokens.
-                logger.warning("You should set --enable_chunked_prefill=False "
-                               "and --max_num_batched_tokens "
-                               "should be equal to --max_seq_len_to_capture")
-                bypass_model_exec = False
-                assert start_pos == num_prefill_tokens
-                break
-
-            current_tokens = input_tokens_tensor[start_pos:end_pos]
-            num_tokens = slen
-
-            # collecting data for rebuilding the input
-            input_tokens_list.append(current_tokens)
-            start_pos_list.append(start_pos)
-
-            ret = self.select(current_tokens,
-                              torch.ones_like(current_tokens, dtype=bool))
-            if ret[0] is None:
-                # didn't find any match.
-                bypass_model_exec = False
-                num_computed_tokens_list.append(0)
-                continue
-
-            roi: torch.Tensor = ret[1]
-            keys: torch.Tensor = ret[2]
-            values: torch.Tensor = ret[3]
-            hidden: torch.Tensor = ret[4]
-
-            num_computed_tokens = roi.shape[0]
-            num_computed_tokens_list.append(num_computed_tokens)
-
-            # check if both KV cache and the hidden states are received
-            # If not, need to redo the forwarding to compute missing states
-            if not all([(num_computed_tokens == num_tokens), hidden is not None
-                        ]):
-                bypass_model_exec = False
-
-            # update the end position based on how many tokens are cached.
-            end_pos = start_pos + num_computed_tokens
-
-            # put received KV caches into paged memory
-            for cur_layer in range(start_layer, end_layer):
-
-                layer_id = cur_layer - start_layer
-                kv_cache = kv_caches[layer_id]
-                layer = model_executable.model.layers[cur_layer]
-
-                # get remote kvcache
-                remote_k, remote_v = keys[layer_id], values[layer_id]
-
-                self.kv_helper.put_kv_to_cache(model_executable, remote_k,
-                                               remote_v, layer, kv_cache,
-                                               slot_mapping, start_pos,
-                                               end_pos)
-
-            hidden_or_intermediate_states_for_one_req.append(hidden)
-
-        if not bypass_model_exec:
-            # Some of the KV cache is not retrieved
-            # Here we will fall back to normal model forwarding
-            # But optionally you can adjust model_input so that you only do
-            # prefilling on those tokens that are missing KV caches.
-            logger.warning(
-                "[rank%d]: Failed to receive all KVs and hidden "
-                "states, redo model forwarding.", torch.distributed.get_rank())
-            hidden_or_intermediate_states = None
-
-        else:
-            logger.debug(
-                "[rank%d]: Successfully received all KVs and hidden "
-                "states, skip model forwarding.", torch.distributed.get_rank())
-            hidden_or_intermediate_states = torch.cat(
-                hidden_or_intermediate_states_for_one_req, dim=0)
-
-        return hidden_or_intermediate_states, bypass_model_exec, model_input
-
-    def close(self):
-        self.producer_data_pipe.close()
-        self.consumer_data_pipe.close()
-        if self.config.kv_connector == "PyNcclConnector":
-            self.producer_signal_pipe.close()
-            self.consumer_signal_pipe.close()
-        elif self.config.kv_connector == "MooncakeConnector":
-            # MooncakePipe reuses data_pipe for signal_pipe, so we only have to
-            # close the data_pipe.
-            pass
diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py
index 1a11cb6d0189a..1da41790f9fb1 100644
--- a/vllm/distributed/kv_transfer/kv_connector/utils.py
+++ b/vllm/distributed/kv_transfer/kv_connector/utils.py
@@ -13,8 +13,8 @@ import torch
 import vllm.envs as envs
 from vllm import _custom_ops as ops
 from vllm.config import VllmConfig, get_current_vllm_config
-from vllm.distributed.kv_transfer.kv_connector.factory import (
-    KVConnectorFactory)
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorBase_V1)
 from vllm.logger import init_logger
 from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
 
@@ -106,9 +106,8 @@ def get_kv_connector_cache_layout():
     vllm_config = get_current_vllm_config()
     kv_config = vllm_config.kv_transfer_config
     if kv_config is not None:
-        connector_cls = KVConnectorFactory.get_connector_class(kv_config)
-        required_kvcache_layout = connector_cls.get_required_kvcache_layout(
-            vllm_config)
+        required_kvcache_layout = (
+            KVConnectorBase_V1.get_required_kvcache_layout(vllm_config))
         if required_kvcache_layout is not None:
             return required_kvcache_layout
         logger.info_once("Connectors do not specify a " \
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
index 934a03a12ee5e..62a4980bff975 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
@@ -52,7 +52,7 @@ class MultiConnector(KVConnectorBase_V1):
             temp_config.kv_transfer_config = KVTransferConfig(
                 **ktc, engine_id=engine_id)
             self._connectors.append(
-                KVConnectorFactory.create_connector_v1(temp_config, role))
+                KVConnectorFactory.create_connector(temp_config, role))
 
         # A mapping from request id to the index of the connector chosen to
         # load the request from (if any).
@@ -223,9 +223,9 @@ class MultiConnector(KVConnectorBase_V1):
         for ktc in ktcs:
             kv_transfer_config = KVTransferConfig(**ktc)
             temp_vllm_config.kv_transfer_config = kv_transfer_config
-            required_kvcache_layout = KVConnectorFactory.get_connector_class(
-                kv_transfer_config).get_required_kvcache_layout(
-                    temp_vllm_config)
+            required_kvcache_layout = (
+                KVConnectorBase_V1.get_required_kvcache_layout(
+                    temp_vllm_config))
             if required_kvcache_layout is not None:
                 layouts.add(required_kvcache_layout)
 
diff --git a/vllm/distributed/kv_transfer/kv_connector_agent.py b/vllm/distributed/kv_transfer/kv_connector_agent.py
deleted file mode 100644
index 8633fdaf59f8b..0000000000000
--- a/vllm/distributed/kv_transfer/kv_connector_agent.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""A centralized entrypoint to perform distributed KV cache transfer.
-
-This implementation is a shim wrapper on two APIs exposed by `kv_connector`:
-1. `send_kv_caches_and_hidden_states`
-2. `recv_kv_caches_and_hidden_states
-"""
-from typing import TYPE_CHECKING, Union
-
-if TYPE_CHECKING:
-    from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata
-    from vllm.config import VllmConfig
-
-import torch
-
-from vllm.distributed.kv_transfer.kv_connector.factory import (
-    KVConnectorFactory)
-from vllm.logger import init_logger
-from vllm.sequence import IntermediateTensors
-
-logger = init_logger(__name__)
-
-
-class KVTransferAgent:
-    """
-    A class designated for distributed KV transfer
-    
-    Target use cases:
-        1. Disaggregated prefill
-        2. Remote KV cache storage
-    """
-
-    def __init__(
-        self,
-        rank: int,
-        local_rank: int,
-        config: "VllmConfig",
-    ):
-
-        self.config = config
-
-        if config.kv_transfer_config is None:
-            raise ValueError("KVTransferConfig is not set in the VllmConfig,"
-                             " cannot initialize KVConnector.")
-
-        assert self.config.kv_transfer_config.is_kv_transfer_instance, "KV"\
-            "TransferAgent should only be used when kv_connector is set."
-
-        self.connector = KVConnectorFactory.create_connector_v0(
-            rank, local_rank, config)
-
-    def send_kv_caches_and_hidden_states(
-        self,
-        model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor],
-        hidden_or_intermediate_states: Union[torch.Tensor,
-                                             IntermediateTensors],
-    ) -> None:
-
-        self.connector.send_kv_caches_and_hidden_states(
-            model_executable, model_input, kv_caches,
-            hidden_or_intermediate_states)
-
-    def close(self) -> None:
-        self.connector.close()
-
-    def recv_kv_caches_and_hidden_states(
-        self, model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor]
-    ) -> tuple[Union[torch.Tensor, IntermediateTensors], bool,
-               "ModelInputForGPUWithSamplingMetadata"]:
-
-        return self.connector.recv_kv_caches_and_hidden_states(
-            model_executable, model_input, kv_caches)
diff --git a/vllm/distributed/kv_transfer/kv_transfer_state.py b/vllm/distributed/kv_transfer/kv_transfer_state.py
index 60f1d5d8bca75..5e0f64fca220c 100644
--- a/vllm/distributed/kv_transfer/kv_transfer_state.py
+++ b/vllm/distributed/kv_transfer/kv_transfer_state.py
@@ -8,7 +8,6 @@ from vllm.distributed.kv_transfer.kv_connector.factory import (
     KVConnectorFactory)
 from vllm.distributed.kv_transfer.kv_connector.v1 import (KVConnectorBase_V1,
                                                           KVConnectorRole)
-from vllm.distributed.parallel_state import get_world_group
 
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
@@ -61,11 +60,7 @@ def ensure_kv_transfer_initialized(vllm_config: "VllmConfig") -> None:
     if (vllm_config.kv_transfer_config.is_kv_transfer_instance
             and _KV_CONNECTOR_AGENT is None):
         if envs.VLLM_USE_V1:
-            _KV_CONNECTOR_AGENT = KVConnectorFactory.create_connector_v1(
+            _KV_CONNECTOR_AGENT = KVConnectorFactory.create_connector(
                 config=vllm_config, role=KVConnectorRole.WORKER)
         else:
-            _KV_CONNECTOR_AGENT = KVConnectorFactory.create_connector_v0(
-                rank=get_world_group().rank,
-                local_rank=get_world_group().local_rank,
-                config=vllm_config,
-            )
+            raise ValueError("V0 is no longer supported")
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
index 49a744cfec69a..d39aea1f2d116 100644
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -83,7 +83,7 @@ class Scheduler(SchedulerInterface):
             assert len(self.kv_cache_config.kv_cache_groups) == 1, (
                 "Multiple KV cache groups are not currently supported "
                 "with KV connectors")
-            self.connector = KVConnectorFactory.create_connector_v1(
+            self.connector = KVConnectorFactory.create_connector(
                 config=self.vllm_config, role=KVConnectorRole.SCHEDULER)
 
         self.kv_event_publisher = EventPublisherFactory.create(
diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py
index 343befe176797..a03ebe35d8e0a 100644
--- a/vllm/v1/worker/kv_connector_model_runner_mixin.py
+++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py
@@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Optional
 from vllm.config import VllmConfig
 from vllm.distributed.kv_transfer import (get_kv_transfer_group,
                                           has_kv_transfer_group)
-from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorBase_V1
+from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
 from vllm.forward_context import get_forward_context, set_forward_context
 from vllm.logger import init_logger
 from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput,
@@ -31,7 +31,7 @@ class KVConnectorModelRunnerMixin:
         # Update KVConnector with the KVConnector metadata forward().
         if has_kv_transfer_group():
             kv_connector = get_kv_transfer_group()
-            assert isinstance(kv_connector, KVConnectorBase_V1)
+            assert isinstance(kv_connector, KVConnectorBase)
             assert scheduler_output.kv_connector_metadata is not None
             kv_connector.bind_connector_metadata(
                 scheduler_output.kv_connector_metadata)
@@ -93,7 +93,7 @@ class KVConnectorModelRunnerMixin:
 
         # Update KVConnector with the KVConnector metadata forward().
         kv_connector = get_kv_transfer_group()
-        assert isinstance(kv_connector, KVConnectorBase_V1)
+        assert isinstance(kv_connector, KVConnectorBase)
         assert scheduler_output.kv_connector_metadata is not None
         kv_connector.bind_connector_metadata(
             scheduler_output.kv_connector_metadata)

From 6ad6b8e115b8b46ad918284d862bdadded3af447 Mon Sep 17 00:00:00 2001
From: TJian <tunjian.tan@embeddedllm.com>
Date: Mon, 4 Aug 2025 19:12:16 -0700
Subject: [PATCH 203/224] [FEAT] Refactor ROPE into module (#22192)

Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
---
 .../model_executor/layers/rotary_embedding.py | 1967 -----------------
 .../layers/rotary_embedding/__init__.py       |  190 ++
 .../layers/rotary_embedding/base.py           |  237 ++
 .../layers/rotary_embedding/common.py         |  105 +
 .../rotary_embedding/deepseek_scaling_rope.py |  131 ++
 .../rotary_embedding/dual_chunk_rope.py       |  188 ++
 .../dynamic_ntk_alpha_rope.py                 |   41 +
 .../dynamic_ntk_scaling_rope.py               |   67 +
 .../rotary_embedding/linear_scaling_rope.py   |  115 +
 .../layers/rotary_embedding/llama3_rope.py    |   54 +
 .../rotary_embedding/llama4_vision_rope.py    |   74 +
 .../layers/rotary_embedding/mrope.py          |  670 ++++++
 .../rotary_embedding/ntk_scaling_rope.py      |   42 +
 .../phi3_long_rope_scaled_rope.py             |  129 ++
 .../rotary_embedding/yarn_scaling_rope.py     |   68 +
 15 files changed, 2111 insertions(+), 1967 deletions(-)
 delete mode 100644 vllm/model_executor/layers/rotary_embedding.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/__init__.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/base.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/common.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/dynamic_ntk_alpha_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/linear_scaling_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/llama3_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/mrope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/ntk_scaling_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/phi3_long_rope_scaled_rope.py
 create mode 100644 vllm/model_executor/layers/rotary_embedding/yarn_scaling_rope.py

diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py
deleted file mode 100644
index 24dd86620fe91..0000000000000
--- a/vllm/model_executor/layers/rotary_embedding.py
+++ /dev/null
@@ -1,1967 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-# Adapted from
-# https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py
-# Copyright 2023 The vLLM team.
-# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
-#
-# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
-# and OPT implementations in this library. It has been modified from its
-# original forms to accommodate minor architectural differences compared
-# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Rotary Positional Embeddings."""
-import itertools
-import math
-from typing import Any, Optional, Union
-
-import numpy as np
-import torch
-import torch.nn as nn
-from transformers import PretrainedConfig
-
-from vllm.model_executor.custom_op import CustomOp
-from vllm.platforms import current_platform
-
-if current_platform.is_cuda():
-    from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb
-
-
-def _rotate_neox(x: torch.Tensor) -> torch.Tensor:
-    x1 = x[..., :x.shape[-1] // 2]
-    x2 = x[..., x.shape[-1] // 2:]
-    return torch.cat((-x2, x1), dim=-1)
-
-
-def _rotate_gptj(x: torch.Tensor) -> torch.Tensor:
-    x1 = x[..., ::2]
-    x2 = x[..., 1::2]
-    x = torch.stack((-x2, x1), dim=-1)
-    return x.flatten(-2)
-
-
-def _apply_rotary_emb_torch(
-    x: torch.Tensor,
-    cos: torch.Tensor,
-    sin: torch.Tensor,
-    is_neox_style: bool,
-) -> torch.Tensor:
-    cos = cos.unsqueeze(-2).to(x.dtype)
-    sin = sin.unsqueeze(-2).to(x.dtype)
-    if is_neox_style:
-        x1, x2 = torch.chunk(x, 2, dim=-1)
-    else:
-        x1 = x[..., ::2]
-        x2 = x[..., 1::2]
-    o1 = x1 * cos - x2 * sin
-    o2 = x2 * cos + x1 * sin
-    if is_neox_style:
-        return torch.cat((o1, o2), dim=-1)
-    else:
-        return torch.stack((o1, o2), dim=-1).flatten(-2)
-
-
-def _apply_rotary_emb(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor,
-                      is_neox_style: bool) -> torch.Tensor:
-    """
-    Args:
-        x: [num_tokens, num_heads, head_size]
-        cos: [num_tokens, head_size // 2]
-        sin: [num_tokens, head_size // 2]
-        is_neox_style: Whether to use the Neox-style or GPT-J-style rotary
-            positional embeddings.
-    """
-    if current_platform.is_cuda():
-        return apply_rotary_emb(x.unsqueeze(0), cos, sin,
-                                not is_neox_style).squeeze(0)
-    else:
-        return _apply_rotary_emb_torch(x, cos, sin, is_neox_style)
-
-
-@CustomOp.register("rotary_embedding")
-class RotaryEmbedding(CustomOp):
-    """Original rotary positional embedding."""
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        dtype: torch.dtype,
-    ) -> None:
-        super().__init__()
-        self.head_size = head_size
-        self.rotary_dim = rotary_dim
-        self.max_position_embeddings = max_position_embeddings
-        self.base = base
-        self.is_neox_style = is_neox_style
-        self.dtype = dtype
-
-        cache = self._compute_cos_sin_cache()
-        cache = cache.to(dtype)
-        self.cos_sin_cache: torch.Tensor
-        self.register_buffer("cos_sin_cache", cache, persistent=False)
-
-    def _compute_inv_freq(self, base: float) -> torch.Tensor:
-        """Compute the inverse frequency."""
-        # NOTE(woosuk): To exactly match the HF implementation, we need to
-        # use CPU to compute the cache and then move it to GPU. However, we
-        # create the cache on GPU for faster initialization. This may cause
-        # a slight numerical difference between the HF implementation and ours.
-        inv_freq = 1.0 / (base**(torch.arange(
-            0, self.rotary_dim, 2, dtype=torch.float) / self.rotary_dim))
-        return inv_freq
-
-    def _compute_cos_sin_cache(self) -> torch.Tensor:
-        """Compute the cos and sin cache."""
-        inv_freq = self._compute_inv_freq(self.base)
-        t = torch.arange(self.max_position_embeddings, dtype=torch.float)
-
-        freqs = torch.einsum("i,j -> ij", t, inv_freq)
-        cos = freqs.cos()
-        sin = freqs.sin()
-        cache = torch.cat((cos, sin), dim=-1)
-        return cache
-
-    def forward_native(
-        self,
-        positions: torch.Tensor,
-        query: torch.Tensor,
-        key: Optional[torch.Tensor] = None,
-        offsets: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-        """A PyTorch-native implementation of forward()."""
-        if offsets is not None:
-            positions = positions + offsets
-        positions = positions.flatten()
-        num_tokens = positions.shape[0]
-        cos_sin = self.cos_sin_cache.index_select(0, positions)
-        cos, sin = cos_sin.chunk(2, dim=-1)
-
-        query_shape = query.shape
-        query = query.view(num_tokens, -1, self.head_size)
-        query_rot = query[..., :self.rotary_dim]
-        query_pass = query[..., self.rotary_dim:]
-        query_rot = _apply_rotary_emb_torch(query_rot, cos, sin,
-                                            self.is_neox_style)
-        query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
-
-        # key may be None in some cases, e.g. cross-layer KV sharing
-        if key is not None:
-            key_shape = key.shape
-            key = key.view(num_tokens, -1, self.head_size)
-            key_rot = key[..., :self.rotary_dim]
-            key_pass = key[..., self.rotary_dim:]
-            key_rot = _apply_rotary_emb_torch(key_rot, cos, sin,
-                                              self.is_neox_style)
-            key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
-        return query, key
-
-    def forward_cuda(
-        self,
-        positions: torch.Tensor,
-        query: torch.Tensor,
-        key: Optional[torch.Tensor] = None,
-        offsets: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-        from vllm import _custom_ops as ops
-
-        # __setattr__ in nn.Module (called by `self.cos_sin_cache = ...`)
-        # is expensive, so avoid calling it if possible
-        if self.cos_sin_cache.device != query.device or \
-            self.cos_sin_cache.dtype != query.dtype:
-            self.cos_sin_cache = self.cos_sin_cache.to(query.device,
-                                                       dtype=query.dtype)
-
-        # ops.rotary_embedding()/batched_rotary_embedding()
-        # are in-place operations that update the query and key tensors.
-        if offsets is not None:
-            ops.batched_rotary_embedding(positions, query, key, self.head_size,
-                                         self.cos_sin_cache,
-                                         self.is_neox_style, self.rotary_dim,
-                                         offsets)
-        else:
-            ops.rotary_embedding(positions, query, key, self.head_size,
-                                 self.cos_sin_cache, self.is_neox_style)
-        return query, key
-
-    def forward_xpu(
-        self,
-        positions: torch.Tensor,
-        query: torch.Tensor,
-        key: Optional[torch.Tensor] = None,
-        offsets: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-        from vllm._ipex_ops import ipex_ops as ops
-
-        self.cos_sin_cache = self.cos_sin_cache.to(positions.device,
-                                                   dtype=query.dtype)
-        # ops.rotary_embedding()/batched_rotary_embedding()
-        # are in-place operations that update the query and key tensors.
-        if key is None:
-            # XPU kernel doesn't support key=None so fall back to native impl
-            # TODO(sarckk): add support for optional key in
-            # ipex.llm.functional.rotary_embedding_batched
-            return self.forward_native(positions, query, key, offsets)
-        else:
-            if offsets is not None:
-                ops.batched_rotary_embedding(positions, query, key,
-                                             self.head_size,
-                                             self.cos_sin_cache,
-                                             self.is_neox_style,
-                                             self.rotary_dim, offsets)
-            else:
-                ops.rotary_embedding(positions, query, key, self.head_size,
-                                     self.cos_sin_cache, self.is_neox_style)
-        return query, key
-
-    def forward_neuron(
-        self,
-        positions: torch.Tensor,
-        query: torch.Tensor,
-        key: Optional[torch.Tensor] = None,
-        offsets: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-
-        def _apply_rotary_emb_neuron(
-            x: torch.Tensor,
-            cos: torch.Tensor,
-            sin: torch.Tensor,
-            is_neox_style: bool,
-        ) -> torch.Tensor:
-            cos = cos.unsqueeze(-2).to(x.dtype)
-            sin = sin.unsqueeze(-2).to(x.dtype)
-            if is_neox_style:
-                x1, x2 = torch.chunk(x, 2, dim=-1)
-            else:
-                # x1 = x[..., ::2]
-
-                # x2 = x[..., 1::2]
-                d = x.shape[-1] // 2
-                x_reshaped = x.view(-1, x.shape[-1])
-                x1 = x_reshaped[:, ::2].view(*x.shape[:-1], d)
-                x2 = x_reshaped[:, 1::2].view(*x.shape[:-1], d)
-            o1 = x1 * cos - x2 * sin
-            o2 = x2 * cos + x1 * sin
-            if is_neox_style:
-                return torch.cat((o1, o2), dim=-1)
-            else:
-                return torch.stack((o1, o2), dim=-1).flatten(-2)
-
-        if offsets is not None:
-            positions = positions + offsets
-
-        self.cos_sin_cache = self.cos_sin_cache.to(query.device,
-                                                   dtype=query.dtype)
-
-        positions = positions.flatten()
-        num_tokens = positions.shape[0]
-        cos_sin = self.cos_sin_cache.index_select(0, positions)
-        cos, sin = cos_sin.chunk(2, dim=-1)
-
-        query_shape = query.shape
-        query = query.view(num_tokens, -1, self.head_size)
-        if key is not None:
-            key_shape = key.shape
-            key = key.view(num_tokens, -1, self.head_size)
-
-        if self.rotary_dim == self.head_size:
-            query = _apply_rotary_emb(query, cos, sin, self.is_neox_style)
-            query = query.reshape(query_shape)
-            if key is not None:
-                key = _apply_rotary_emb(key, cos, sin, self.is_neox_style)
-                key = key.reshape(key_shape)
-        else:
-            head_size = query.shape[-1]
-            query_reshaped = query.view(-1, head_size)
-            query_pass = query_reshaped[:, self.rotary_dim:].view(
-                *query.shape[:-1], head_size - self.rotary_dim)
-            query_rot = query_reshaped[:, :self.rotary_dim].view(
-                *query.shape[:-1], self.rotary_dim)
-            query_rot = _apply_rotary_emb_neuron(query_rot, cos, sin,
-                                                 self.is_neox_style)
-            query = torch.cat((query_rot, query_pass),
-                              dim=-1).reshape(query_shape)
-
-            if key is not None:
-                key_reshaped = key.view(-1, head_size)
-                key_pass = key_reshaped[:, self.rotary_dim:].view(
-                    *key.shape[:-1], head_size - self.rotary_dim)
-                key_rot = key_reshaped[:, :self.rotary_dim].view(
-                    *key.shape[:-1], self.rotary_dim)
-                key_rot = _apply_rotary_emb_neuron(key_rot, cos, sin,
-                                                   self.is_neox_style)
-                key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
-        return query, key
-
-    def extra_repr(self) -> str:
-        s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}"
-        s += f", max_position_embeddings={self.max_position_embeddings}"
-        s += f", base={self.base}, is_neox_style={self.is_neox_style}"
-        return s
-
-
-class LinearScalingRotaryEmbedding(RotaryEmbedding):
-    """RotaryEmbedding extended with linear scaling.
-
-    It supports multiple scaling factors. Since multiple LoRA adapters may have
-    different scaling factors, we need multiple cos/sin caches. In this way,
-    instead of running rotary embedding kernel per lora, we can run multiple
-    lora in a batched way.
-
-    In addition to that, we also keep the cos/sin cache for the scaling factor
-    of 1 (default) at all times.
-
-    Exemplary for two scaling factors x=1, y and z with embeddings
-    [[x11, x12, ... x1m], ..., [xn1, xn2, ..., xnm]] and
-    [[y11, y12, ... y1o], ..., [yn1, yn2, ..., yno]], and
-    [[z11, z12, ... z1p], ..., [zn1, zn2, ..., znp]],
-
-    we construct the cos/sin cache as follows:
-    [[x11, x12, ... x1m, y11, y12, ... y1o, z11, z12, ... z1p],
-        ...
-     [xn1, xn2, ... xnm, yn1, yn2, ... yno, zn1, zn2, ... znp]]
-
-    We then use offsets to index into the cos/sin cache for
-    the respective scaling factors.
-
-    The offset to cache can be accessed via `scaling_factor_to_offset` API.
-
-    Credits to the Reddit user /u/kaiokendev
-    """
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        scaling_factors: Union[list[float], float],
-        dtype: torch.dtype,
-    ) -> None:
-        if isinstance(scaling_factors, float):
-            scaling_factors = [scaling_factors]
-        self.scaling_factors: list[float] = scaling_factors  # noqa
-        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
-                         is_neox_style, dtype)
-        # Lazy initialized.
-        self._scaling_factor_to_offset: dict[float, int]
-
-    def _compute_cos_sin_cache(self) -> torch.Tensor:
-        inv_freq = self._compute_inv_freq(self.base)
-        cache_list: list[torch.Tensor] = []
-        # offsets to the next cache in a tensor.
-        # Each offset corresponds to the same index in scaling_factors.
-        offsets: list[int] = []
-        for scaling_factor in self.scaling_factors:
-            # NOTE(woosuk): self.max_position_embeddings is the original
-            # maximum length before applying the rope scaling.
-            # Thus, the maximum length after applying the rope scaling is
-            # self.max_position_embeddings * self.scaling_factor.
-            max_len = self.max_position_embeddings * scaling_factor
-            t = torch.arange(max_len, dtype=torch.float)
-            t = t / scaling_factor
-
-            freqs = torch.einsum("i,j -> ij", t, inv_freq)
-            cos = freqs.cos()
-            sin = freqs.sin()
-            cache = torch.cat((cos, sin), dim=-1)
-            if not cache_list:
-                offset = 0
-            else:
-                last_offset = offsets[-1]
-                next_max_len = cache_list[-1].shape[0]
-                offset = last_offset + next_max_len
-            offsets.append(offset)
-            cache_list.append(cache)
-        self._scaling_factor_to_offset = {
-            float(scaling_factor): offsets[i]
-            for i, scaling_factor in enumerate(self.scaling_factors)
-        }
-        assert len(self.scaling_factors) == len(offsets)
-        return torch.cat(cache_list, dim=0)
-
-    @property
-    def scaling_factor_to_offset(self) -> dict[float, int]:
-        return self._scaling_factor_to_offset
-
-
-class NTKScalingRotaryEmbedding(RotaryEmbedding):
-    """RotaryEmbedding extended with fixed and mixed NTK scaling.
-    https://kexue.fm/archives/9706 """
-
-    def __init__(self,
-                 head_size: int,
-                 rotary_dim: int,
-                 max_position_embeddings: int,
-                 base: float,
-                 is_neox_style: bool,
-                 scaling_factor: float,
-                 dtype: torch.dtype,
-                 mixed_b: Optional[float] = None) -> None:
-        self.scaling_factor = scaling_factor
-        self.mixed_b = mixed_b
-        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
-                         is_neox_style, dtype)
-
-    def _compute_inv_freq(self, base: float) -> torch.Tensor:
-        base = self.base * (self.scaling_factor if self.mixed_b is None else 1)
-        inv_freq = super()._compute_inv_freq(base)
-
-        if self.mixed_b is None:
-            inv_freq = inv_freq / self.scaling_factor**(2 / self.rotary_dim)
-        else:
-            a = torch.tensor(self.scaling_factor).log() / (self.rotary_dim /
-                                                           2)**self.mixed_b
-            lambda_1_m = (a * torch.arange(
-                1, self.rotary_dim // 2 + 1).float()**self.mixed_b).exp()
-            inv_freq = inv_freq / lambda_1_m
-
-        return inv_freq
-
-
-class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding):
-    """RotaryEmbedding extended with Dynamic NTK scaling.
-
-    Credits to the Reddit users /u/bloc97 and /u/emozilla
-    """
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        scaling_factor: float,
-        dtype: torch.dtype,
-    ) -> None:
-        self.scaling_factor = scaling_factor
-        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
-                         is_neox_style, dtype)
-
-    def _compute_cos_sin_cache(self) -> torch.Tensor:
-        # NOTE(woosuk): self.max_position_embeddings is the original
-        # maximum length before applying the rope scaling.
-        # Thus, the maximum length after applying the rope scaling is
-        # self.max_position_embeddings * self.scaling_factor.
-        max_len = self.max_position_embeddings * self.scaling_factor
-        base = self.base * (
-            (self.scaling_factor * max_len / self.max_position_embeddings) -
-            (self.scaling_factor - 1))**(self.rotary_dim /
-                                         (self.rotary_dim - 2))
-        inv_freq = self._compute_inv_freq(base)
-        t = torch.arange(max_len, dtype=torch.float)
-
-        freqs = torch.einsum("i,j -> ij", t, inv_freq)
-        cos = freqs.cos()
-        sin = freqs.sin()
-        cache = torch.cat((cos, sin), dim=-1)
-        return cache
-
-
-class DynamicNTKAlphaRotaryEmbedding(RotaryEmbedding):
-    """RotaryEmbedding extended with Dynamic NTK alpha.
-
-    Based on the original RotaryEmbedding implementation.
-    """
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        scaling_alpha: float,
-        dtype: torch.dtype,
-    ) -> None:
-        self.scaling_alpha = scaling_alpha
-        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
-                         is_neox_style, dtype)
-
-    def _compute_cos_sin_cache(self) -> torch.Tensor:
-        # For Hunyuan DynamicNTKAlphaRotaryEmbedding
-        max_len = self.max_position_embeddings
-        base = self.base * self.scaling_alpha**(self.rotary_dim /
-                                                (self.rotary_dim - 2))
-        inv_freq = self._compute_inv_freq(base)
-        t = torch.arange(max_len, dtype=torch.float)
-
-        freqs = torch.einsum("i,j -> ij", t, inv_freq)
-        cos = freqs.cos()
-        sin = freqs.sin()
-        cache = torch.cat((cos, sin), dim=-1)
-        return cache
-
-
-# Inverse dim formula to find dim based on number of rotations
-def _yarn_find_correction_dim(num_rotations: int,
-                              dim: int,
-                              base: float = 10000,
-                              max_position_embeddings: int = 2048) -> float:
-    return (dim * math.log(max_position_embeddings /
-                           (num_rotations * 2 * math.pi))) / (2 *
-                                                              math.log(base))
-
-
-# Find dim range bounds based on rotations
-def _yarn_find_correction_range(
-        low_rot: int,
-        high_rot: int,
-        dim: int,
-        base: float = 10000,
-        max_position_embeddings: int = 2048) -> tuple[int, int]:
-    low = math.floor(
-        _yarn_find_correction_dim(low_rot, dim, base, max_position_embeddings))
-    high = math.ceil(
-        _yarn_find_correction_dim(high_rot, dim, base,
-                                  max_position_embeddings))
-    return max(low, 0), min(high, dim - 1)  # Clamp values just in case
-
-
-def _yarn_linear_ramp_mask(low: float, high: float, dim: int,
-                           dtype: torch.dtype) -> torch.Tensor:
-    if low == high:
-        high += 0.001  # Prevent singularity
-
-    linear_func = (torch.arange(dim, dtype=dtype) - low) / (high - low)
-    ramp_func = torch.clamp(linear_func, 0, 1)
-    return ramp_func
-
-
-def _yarn_get_mscale(scale: float = 1) -> float:
-    if scale <= 1:
-        return 1.0
-    return 0.1 * math.log(scale) + 1.0
-
-
-class YaRNScalingRotaryEmbedding(RotaryEmbedding):
-    """RotaryEmbedding extended with YaRN method.
-
-    Credits to Peng et al. github.com/jquesnelle/yarn
-    """
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        scaling_factor: float,
-        dtype: torch.dtype,
-        *,
-        extrapolation_factor: float = 1,
-        attn_factor: float = 1,
-        beta_fast: int = 32,
-        beta_slow: int = 1,
-    ) -> None:
-        self.scaling_factor = scaling_factor
-        self.extrapolation_factor = extrapolation_factor
-        self.attn_factor = attn_factor
-        self.beta_fast = beta_fast
-        self.beta_slow = beta_slow
-        # Get n-d magnitude scaling corrected for interpolation
-        self.mscale = float(
-            _yarn_get_mscale(self.scaling_factor) * attn_factor)
-        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
-                         is_neox_style, dtype)
-
-    def _compute_inv_freq(self, scaling_factor: float) -> torch.Tensor:
-        pos_freqs = self.base**(
-            torch.arange(0, self.rotary_dim, 2, dtype=torch.float) /
-            self.rotary_dim)
-        inv_freq_extrapolation = 1.0 / pos_freqs
-        inv_freq_interpolation = 1.0 / (scaling_factor * pos_freqs)
-
-        low, high = _yarn_find_correction_range(self.beta_fast, self.beta_slow,
-                                                self.rotary_dim, self.base,
-                                                self.max_position_embeddings)
-        # Get n-d rotational scaling corrected for extrapolation
-        inv_freq_mask = (1 - _yarn_linear_ramp_mask(
-            low, high, self.rotary_dim // 2,
-            dtype=torch.float)) * self.extrapolation_factor
-        inv_freq = inv_freq_interpolation * (
-            1 - inv_freq_mask) + inv_freq_extrapolation * inv_freq_mask
-        return inv_freq
-
-    def _compute_cos_sin_cache(self) -> torch.Tensor:
-        inv_freq = self._compute_inv_freq(self.scaling_factor)
-        t = torch.arange(self.max_position_embeddings * self.scaling_factor,
-                         dtype=torch.float32)
-        freqs = torch.einsum("i,j -> ij", t, inv_freq)
-        cos = (freqs.cos() * self.mscale)
-        sin = (freqs.sin() * self.mscale)
-        cache = torch.cat((cos, sin), dim=-1)
-        return cache
-
-
-class Phi3LongRoPEScaledRotaryEmbedding(nn.Module):
-    """Phi3 family of models scaled rotary embedding.
-
-    Based on the original RotaryEmbedding implementation.
-    """
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        original_max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        dtype: torch.dtype,
-        short_factor: list[float],
-        long_factor: list[float],
-        short_mscale: Optional[float] = None,
-        long_mscale: Optional[float] = None,
-    ):
-        super().__init__()
-
-        if is_neox_style is False:
-            raise ValueError(
-                "`Phi3LongRoPEScaledRotaryEmbedding` only supports neox_style."
-            )
-
-        self.rotary_dim = rotary_dim
-        self.head_size = head_size
-        self.max_position_embeddings = max_position_embeddings
-        self.original_max_position_embeddings = original_max_position_embeddings
-        self.base = base
-        self.short_factor = short_factor
-        self.long_factor = long_factor
-
-        scale = self.max_position_embeddings / \
-                self.original_max_position_embeddings
-        if scale <= 1.0:
-            scaling_factor = 1.0
-        else:
-            scaling_factor = math.sqrt(
-                1 + math.log(scale) /
-                math.log(self.original_max_position_embeddings))
-        if short_mscale is None:
-            short_mscale = scaling_factor
-        if long_mscale is None:
-            long_mscale = scaling_factor
-
-        self.short_mscale = short_mscale
-        self.long_mscale = long_mscale
-
-        short_cache = self._compute_cos_sin_cache(
-            original_max_position_embeddings, short_factor, short_mscale)
-        short_cache = short_cache.to(dtype)
-
-        long_cache = self._compute_cos_sin_cache(max_position_embeddings,
-                                                 long_factor, long_mscale)
-        long_cache = long_cache.to(dtype)
-
-        long_short_cache = torch.cat([short_cache, long_cache], dim=0)
-        self.register_buffer("long_short_cos_sin_cache",
-                             long_short_cache,
-                             persistent=False)
-
-    def _compute_inv_freq(self, rescale_factors: list[float]) -> torch.Tensor:
-        rescale_factors = torch.tensor(rescale_factors, dtype=torch.float32)
-        inv_freq = 1.0 / (rescale_factors * (self.base**(torch.arange(
-            0, self.rotary_dim, 2, dtype=torch.float) / self.rotary_dim)))
-        return inv_freq
-
-    def _compute_cos_sin_cache(
-        self,
-        max_position_embeddings: int,
-        rescale_factors: list[float],
-        mscale: float,
-    ) -> torch.Tensor:
-        inv_freq = self._compute_inv_freq(rescale_factors)
-        t = torch.arange(max_position_embeddings, dtype=torch.float)
-        freqs = torch.einsum("i,j -> ij", t, inv_freq)
-        cos = freqs.cos() * mscale
-        sin = freqs.sin() * mscale
-        cache = torch.cat((cos, sin), dim=-1)
-        return cache
-
-    def forward(
-        self,
-        positions: torch.Tensor,
-        query: torch.Tensor,
-        key: Optional[torch.Tensor] = None,
-        offsets: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-        assert key is not None
-        query = query.view(*query.shape[:-1], -1, self.head_size)
-        key = key.view(*key.shape[:-1], -1, self.head_size)
-
-        k = self.original_max_position_embeddings
-        long_prompt_offset = (torch.any(positions > k).float() *
-                              torch.full_like(positions, k)).long()
-        idx = (torch.add(positions, long_prompt_offset)
-               if long_prompt_offset is not None else positions)
-        idx = torch.add(idx, offsets) if offsets is not None else idx
-        cos_sin = torch.index_select(self.long_short_cos_sin_cache, 0, idx)
-
-        cos, sin = cos_sin.chunk(2, dim=-1)
-        cos = cos.repeat(1, 2).unsqueeze(-2)
-        sin = sin.repeat(1, 2).unsqueeze(-2)
-
-        query_rot = query[..., :self.rotary_dim]
-        query_pass = query[..., self.rotary_dim:]
-        query_rot = query_rot * cos + _rotate_neox(query_rot) * sin
-        query = torch.cat((query_rot, query_pass), dim=-1)
-
-        key_rot = key[..., :self.rotary_dim]
-        key_pass = key[..., self.rotary_dim:]
-        key_rot = key_rot * cos + _rotate_neox(key_rot) * sin
-        key = torch.cat((key_rot, key_pass), dim=-1)
-
-        return query.flatten(-2), key.flatten(-2)
-
-
-def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float:
-    if scale <= 1:
-        return 1.0
-    return 0.1 * mscale * math.log(scale) + 1.0
-
-
-class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
-    """RotaryEmbedding extended with YaRN method.
-
-    Credits to Peng et al. github.com/jquesnelle/yarn
-    """
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        scaling_factor: float,
-        dtype: torch.dtype,
-        *,
-        extrapolation_factor: float = 1,
-        attn_factor: float = 1,
-        beta_fast: int = 32,
-        beta_slow: int = 1,
-        mscale: float = 1,
-        mscale_all_dim: float = 0,
-    ) -> None:
-        self.scaling_factor = scaling_factor
-        self.extrapolation_factor = extrapolation_factor
-        self.attn_factor = attn_factor
-        self.beta_fast = beta_fast
-        self.beta_slow = beta_slow
-        # Get n-d magnitude scaling corrected for interpolation.
-        self.mscale = float(
-            yarn_get_mscale(self.scaling_factor, float(mscale)) /
-            yarn_get_mscale(self.scaling_factor, float(mscale_all_dim)) *
-            attn_factor)
-        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
-                         is_neox_style, dtype)
-
-    def _compute_inv_freq(self, scaling_factor: float) -> torch.Tensor:
-        pos_freqs = self.base**(
-            torch.arange(0,
-                         self.rotary_dim,
-                         2,
-                         dtype=torch.float,
-                         device=current_platform.device_type) /
-            self.rotary_dim)
-        inv_freq_extrapolation = 1.0 / pos_freqs
-        inv_freq_interpolation = 1.0 / (scaling_factor * pos_freqs)
-
-        low, high = _yarn_find_correction_range(self.beta_fast, self.beta_slow,
-                                                self.rotary_dim, self.base,
-                                                self.max_position_embeddings)
-        # Get n-d rotational scaling corrected for extrapolation
-        inv_freq_mask = (1 - _yarn_linear_ramp_mask(
-            low, high, self.rotary_dim // 2,
-            dtype=torch.float)) * self.extrapolation_factor
-        inv_freq = inv_freq_interpolation * (
-            1 - inv_freq_mask) + inv_freq_extrapolation * inv_freq_mask
-        return inv_freq
-
-    def _compute_cos_sin_cache(self) -> torch.Tensor:
-        inv_freq = self._compute_inv_freq(self.scaling_factor)
-        t = torch.arange(self.max_position_embeddings * self.scaling_factor,
-                         device=current_platform.device_type,
-                         dtype=torch.float32)
-        freqs = torch.einsum("i,j -> ij", t, inv_freq)
-        cos = (freqs.cos() * self.mscale)
-        sin = (freqs.sin() * self.mscale)
-        cache = torch.cat((cos, sin), dim=-1)
-        return cache
-
-    def forward(
-        self,
-        positions: torch.Tensor,
-        query: torch.Tensor,
-        key: Optional[torch.Tensor] = None,
-        offsets: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-        """PyTorch-native implementation equivalent to forward()."""
-        assert key is not None
-        query_rot = query[..., :self.rotary_dim]
-        key_rot = key[..., :self.rotary_dim]
-        if self.rotary_dim < self.head_size:
-            query_pass = query[..., self.rotary_dim:]
-            key_pass = key[..., self.rotary_dim:]
-
-        if self.cos_sin_cache.device != positions.device:
-            self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(
-                positions.device)
-        cos_sin = self.cos_sin_cache[torch.add(positions, offsets)
-                                     if offsets is not None else positions]
-        cos, sin = cos_sin.chunk(2, dim=-1)
-        if self.is_neox_style:
-            # NOTE(woosuk): Here we assume that the positions tensor has the
-            # shape [batch_size, seq_len].
-            cos = cos.repeat(1, 1, 2).unsqueeze(-2)
-            sin = sin.repeat(1, 1, 2).unsqueeze(-2)
-        else:
-            cos = cos.repeat_interleave(2, dim=-1).unsqueeze(-2)
-            sin = sin.repeat_interleave(2, dim=-1).unsqueeze(-2)
-
-        rotate_fn = _rotate_neox if self.is_neox_style else _rotate_gptj
-        query_rot = query_rot * cos + rotate_fn(query_rot) * sin
-        key_rot = key_rot * cos + rotate_fn(key_rot) * sin
-
-        if self.rotary_dim < self.head_size:
-            query = torch.cat((query_rot, query_pass), dim=-1)
-            key = torch.cat((key_rot, key_pass), dim=-1)
-        else:
-            query = query_rot
-            key = key_rot
-        return query, key
-
-
-class Llama3RotaryEmbedding(RotaryEmbedding):
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        dtype: torch.dtype,
-        scaling_factor: float,
-        low_freq_factor: float,
-        high_freq_factor: float,
-        orig_max_position: int,
-    ) -> None:
-        self.scaling_factor = scaling_factor
-        self.low_freq_factor = low_freq_factor
-        self.high_freq_factor = high_freq_factor
-        self.orig_max_position = orig_max_position
-        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
-                         is_neox_style, dtype)
-
-    def _compute_inv_freq(self, base: float) -> torch.Tensor:
-        inv_freqs = super()._compute_inv_freq(base)
-        low_freq_wavelen = self.orig_max_position / self.low_freq_factor
-        high_freq_wavelen = self.orig_max_position / self.high_freq_factor
-
-        wave_len = 2 * math.pi / inv_freqs
-        if self.low_freq_factor != self.high_freq_factor:
-            smooth = (self.orig_max_position / wave_len - self.low_freq_factor
-                      ) / (self.high_freq_factor - self.low_freq_factor)
-        else:
-            smooth = 0
-        new_freqs = torch.where(
-            wave_len < high_freq_wavelen,
-            inv_freqs,
-            torch.where(
-                wave_len > low_freq_wavelen,
-                inv_freqs / self.scaling_factor,
-                (1 - smooth) * inv_freqs / self.scaling_factor +
-                smooth * inv_freqs,
-            ),
-        )
-        return new_freqs
-
-
-class Llama4VisionRotaryEmbedding(RotaryEmbedding):
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        dtype: torch.dtype,
-    ):
-        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
-                         is_neox_style, dtype)
-
-    def _compute_inv_freq(self, base: float) -> torch.Tensor:
-        inv_freqs = super()._compute_inv_freq(base)
-        inv_freqs = inv_freqs[:(self.rotary_dim // 2)]
-        return inv_freqs
-
-    def _compute_cos_sin_cache(self) -> torch.Tensor:
-        inv_freq = self._compute_inv_freq(self.base)
-
-        # self.max_position_embeddings here is number of image patches
-        # i.e. (image_size // patch_size) ** 2
-        num_patches = self.max_position_embeddings
-        img_idx = torch.arange(num_patches,
-                    dtype=torch.int32) \
-                    .reshape(num_patches, 1)
-        img_idx = torch.cat([img_idx, img_idx[:1]], dim=0)
-        img_idx[-1, -1] = -2  # set to ID_CLS_TOKEN
-        num_patches_single_dim = int(math.sqrt(num_patches))
-        frequencies_x = img_idx % num_patches_single_dim
-        frequencies_y = img_idx // num_patches_single_dim
-        freqs_x = ((frequencies_x + 1)[..., None] *
-                   inv_freq[None, None, :]).repeat_interleave(2, dim=-1)
-        freqs_y = ((frequencies_y + 1)[..., None] *
-                   inv_freq[None, None, :]).repeat_interleave(2, dim=-1)
-        freqs = torch.cat([freqs_x, freqs_y],
-                          dim=-1).float().contiguous()[..., ::2]
-        freqs = freqs.masked_fill(img_idx.reshape(-1, 1, 1) < 0, 0)
-        cache = torch.view_as_complex(
-            torch.stack([torch.cos(freqs), torch.sin(freqs)], dim=-1))
-        return cache
-
-    def forward(
-        self,
-        query: torch.Tensor,
-        key: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-        assert key is not None
-        self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(query.device)
-        query_ = torch.view_as_complex(query.float().reshape(
-            *query.shape[:-1], -1, 2))
-        key_ = torch.view_as_complex(key.float().reshape(
-            *key.shape[:-1], -1, 2))
-        broadcast_shape = [
-            d if i == 1 or i == (query_.ndim - 1) else 1
-            for i, d in enumerate(query_.shape)
-        ]
-        freqs_ci = self.cos_sin_cache.view(*broadcast_shape)
-        query_out = torch.view_as_real(query_ * freqs_ci).flatten(3)
-        key_out = torch.view_as_real(key_ * freqs_ci).flatten(3)
-        return query_out.type_as(query), key_out.type_as(key)
-
-
-class MRotaryEmbedding(RotaryEmbedding):
-    """Rotary Embedding with Multimodal Sections."""
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        dtype: torch.dtype,
-        mrope_section: Optional[list[int]] = None,
-    ) -> None:
-        # In Qwen2.5-VL, the maximum index value is related to the duration of
-        # the input video. We enlarge max_position_embeddings to 4 times to get
-        # a larger the cos and sin cache.
-        self.cache_max_position_num = max_position_embeddings * 4
-        super().__init__(head_size, rotary_dim, self.cache_max_position_num,
-                         base, is_neox_style, dtype)
-
-        self.mrope_section = mrope_section
-        if self.mrope_section:
-            assert sum(self.mrope_section) == rotary_dim // 2
-
-    def forward(
-        self,
-        positions: torch.Tensor,
-        query: torch.Tensor,
-        key: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-        """PyTorch-native implementation equivalent to forward().
-
-        Args:
-            positions:
-                [num_tokens,] (text only) or
-                [3, num_tokens] (T/H/W positions with multimodal inputs)
-            query: [num_tokens, num_heads * head_size]
-            key: [num_tokens, num_kv_heads * head_size]
-        """
-        assert positions.ndim == 1 or positions.ndim == 2
-        assert key is not None
-
-        num_tokens = positions.shape[-1]
-        cos_sin = self.cos_sin_cache[positions]
-        cos, sin = cos_sin.chunk(2, dim=-1)
-        if positions.ndim == 2:
-            assert self.mrope_section
-
-            cos = torch.cat([
-                m[i]
-                for i, m in enumerate(cos.split(self.mrope_section, dim=-1))
-            ],
-                            dim=-1)
-            sin = torch.cat([
-                m[i]
-                for i, m in enumerate(sin.split(self.mrope_section, dim=-1))
-            ],
-                            dim=-1)
-
-        query_shape = query.shape
-        query = query.view(num_tokens, -1, self.head_size)
-        query_rot = query[..., :self.rotary_dim]
-        query_pass = query[..., self.rotary_dim:]
-        query_rot = _apply_rotary_emb(query_rot, cos, sin, self.is_neox_style)
-        query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
-
-        key_shape = key.shape
-        key = key.view(num_tokens, -1, self.head_size)
-        key_rot = key[..., :self.rotary_dim]
-        key_pass = key[..., self.rotary_dim:]
-        key_rot = _apply_rotary_emb(key_rot, cos, sin, self.is_neox_style)
-        key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
-        return query, key
-
-    @classmethod
-    def get_input_positions(
-        cls,
-        input_tokens: list[int],
-        hf_config: PretrainedConfig,
-        image_grid_thw: Optional[Union[list[list[int]], torch.Tensor]],
-        video_grid_thw: Optional[Union[list[list[int]], torch.Tensor]],
-        second_per_grid_ts: Optional[list[float]],
-        context_len: int = 0,
-        seq_len: Optional[int] = None,
-        audio_feature_lengths: Optional[torch.Tensor] = None,
-        use_audio_in_video: bool = False,
-    ) -> tuple[list[list[int]], int]:
-        """Get mrope input positions and delta value."""
-
-        image_grid_thw = [] if image_grid_thw is None else image_grid_thw
-        video_grid_thw = [] if video_grid_thw is None else video_grid_thw
-        second_per_grid_ts = [] if second_per_grid_ts is None else \
-            second_per_grid_ts
-
-        llm_positions, mrope_position_delta = \
-            cls.get_input_positions_tensor(
-                input_tokens=input_tokens,
-                hf_config=hf_config,
-                image_grid_thw=image_grid_thw,
-                video_grid_thw=video_grid_thw,
-                second_per_grid_ts=second_per_grid_ts,
-                context_len=context_len,
-                seq_len=seq_len,
-                audio_feature_lengths=audio_feature_lengths,
-                use_audio_in_video=use_audio_in_video,
-            )
-
-        return llm_positions.tolist(), mrope_position_delta
-
-    @classmethod
-    def get_input_positions_tensor(
-        cls,
-        input_tokens: list[int],
-        hf_config: PretrainedConfig,
-        image_grid_thw: Union[list[list[int]], torch.Tensor],
-        video_grid_thw: Union[list[list[int]], torch.Tensor],
-        second_per_grid_ts: list[float],
-        context_len: int = 0,
-        seq_len: Optional[int] = None,
-        audio_feature_lengths: Optional[torch.Tensor] = None,
-        use_audio_in_video: bool = False,
-    ) -> tuple[torch.Tensor, int]:
-        from vllm.transformers_utils.config import thinker_uses_mrope
-        if thinker_uses_mrope(hf_config):
-            return cls._omni_get_input_positions_tensor(
-                input_tokens=input_tokens,
-                hf_config=hf_config,
-                image_grid_thw=image_grid_thw,
-                video_grid_thw=video_grid_thw,
-                second_per_grid_ts=second_per_grid_ts,
-                context_len=context_len,
-                seq_len=seq_len,
-                audio_feature_lengths=audio_feature_lengths,
-                use_audio_in_video=use_audio_in_video,
-            )
-        elif hf_config.model_type in ["glm4v", "glm4v_moe"]:
-            return cls._glm4v_get_input_positions_tensor(
-                input_tokens=input_tokens,
-                hf_config=hf_config,
-                image_grid_thw=image_grid_thw,
-                video_grid_thw=video_grid_thw,
-                context_len=context_len,
-                seq_len=seq_len,
-            )
-        else:
-            return cls._vl_get_input_positions_tensor(
-                input_tokens=input_tokens,
-                hf_config=hf_config,
-                image_grid_thw=image_grid_thw,
-                video_grid_thw=video_grid_thw,
-                second_per_grid_ts=second_per_grid_ts,
-                context_len=context_len,
-                seq_len=seq_len,
-            )
-
-    @classmethod
-    def _glm4v_get_input_positions_tensor(
-        cls,
-        input_tokens: list[int],
-        hf_config: PretrainedConfig,
-        image_grid_thw: Union[list[list[int]], torch.Tensor],
-        video_grid_thw: Union[list[list[int]], torch.Tensor],
-        context_len: int = 0,
-        seq_len: Optional[int] = None,
-    ) -> tuple[torch.Tensor, int]:
-        """Get mrope input positions and delta value for GLM4V."""
-
-        image_token_id = hf_config.image_token_id
-        video_start_token_id = hf_config.video_start_token_id
-        video_end_token_id = hf_config.video_end_token_id
-        spatial_merge_size = hf_config.vision_config.spatial_merge_size
-        llm_pos_ids_list: list = []
-
-        if not (image_grid_thw is None and video_grid_thw is None):
-            if isinstance(image_grid_thw, torch.Tensor):
-                image_grid_thw = image_grid_thw.tolist()
-
-            input_token_type: list[str] = []
-            video_check_flg = False
-            for token in input_tokens:
-                if token == video_start_token_id:
-                    video_check_flg = True
-                elif token == video_end_token_id:
-                    video_check_flg = False
-
-                if (token == image_token_id) and (video_check_flg is False):
-                    input_token_type.append("image")
-                elif (token == image_token_id) and (video_check_flg is True):
-                    input_token_type.append("video")
-                else:
-                    input_token_type.append("text")
-
-            input_type_group: list[tuple[str, int, int]] = []
-            for key, group_iter in itertools.groupby(
-                    enumerate(input_token_type), lambda x: x[1]):
-                group_list = list(group_iter)
-                start_index = group_list[0][0]
-                end_index = group_list[-1][0] + 1
-                input_type_group.append((key, start_index, end_index))
-
-            video_frame_num = 1
-            mm_data_idx = 0
-            for modality_type, start_idx, end_idx in input_type_group:
-                st_idx = llm_pos_ids_list[-1].max() + 1 if len(
-                    llm_pos_ids_list) > 0 else 0
-                if modality_type == "image":
-                    t, h, w = (
-                        image_grid_thw[mm_data_idx][0],
-                        image_grid_thw[mm_data_idx][1],
-                        image_grid_thw[mm_data_idx][2],
-                    )
-                    llm_grid_t, llm_grid_h, llm_grid_w = \
-                        t, h // spatial_merge_size, w // spatial_merge_size
-
-                    t_index = torch.arange(llm_grid_t).view(-1, 1).expand(
-                        -1, llm_grid_h * llm_grid_w).flatten()
-                    h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand(
-                        llm_grid_t, -1, llm_grid_w).flatten()
-                    w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand(
-                        llm_grid_t, llm_grid_h, -1).flatten()
-                    llm_pos_ids_list.append(
-                        torch.stack([t_index, h_index, w_index]) + st_idx)
-                    mm_data_idx += 1
-
-                elif modality_type == "video":
-                    t, h, w = (
-                        video_frame_num,
-                        image_grid_thw[mm_data_idx][1],
-                        image_grid_thw[mm_data_idx][2],
-                    )
-                    llm_grid_t, llm_grid_h, llm_grid_w = \
-                        t, h // spatial_merge_size, w // spatial_merge_size
-
-                    for t_idx in range(llm_grid_t):
-                        t_index = torch.tensor(t_idx).view(-1, 1).expand(
-                            -1, llm_grid_h * llm_grid_w).flatten()
-                        h_index = torch.arange(llm_grid_h).view(
-                            1, -1, 1).expand(1, -1, llm_grid_w).flatten()
-                        w_index = torch.arange(llm_grid_w).view(
-                            1, 1, -1).expand(1, llm_grid_h, -1).flatten()
-                        llm_pos_ids_list.append(
-                            torch.stack([t_index, h_index, w_index]) + st_idx)
-
-                    mm_data_idx += 1
-                    video_frame_num += 1
-
-                else:
-                    text_len = end_idx - start_idx
-                    llm_pos_ids_list.append(
-                        torch.arange(text_len).view(1, -1).expand(3, -1) +
-                        st_idx)
-                    video_frame_num = 1
-
-        else:
-            text_len = len(input_tokens)
-            llm_pos_ids_list.append(
-                torch.arange(text_len).view(1, -1).expand(3, -1))
-
-        llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
-        llm_positions = llm_positions[:, context_len:seq_len]
-        mrope_position_delta = (llm_positions.max() + 1 -
-                                len(input_tokens)).item()
-        return llm_positions, mrope_position_delta
-
-    @classmethod
-    def _vl_get_input_positions_tensor(
-        cls,
-        input_tokens: list[int],
-        hf_config: PretrainedConfig,
-        image_grid_thw: Union[list[list[int]], torch.Tensor],
-        video_grid_thw: Union[list[list[int]], torch.Tensor],
-        second_per_grid_ts: list[float],
-        context_len: int = 0,
-        seq_len: Optional[int] = None,
-    ) -> tuple[torch.Tensor, int]:
-        """Get mrope input positions and delta value."""
-
-        image_token_id = hf_config.image_token_id
-        video_token_id = hf_config.video_token_id
-        vision_start_token_id = hf_config.vision_start_token_id
-        spatial_merge_size = hf_config.vision_config.spatial_merge_size
-        tokens_per_second = getattr(hf_config.vision_config,
-                                    "tokens_per_second", 1.0)
-
-        input_tokens_tensor = torch.tensor(input_tokens)
-        vision_start_indices = torch.argwhere(
-            input_tokens_tensor == vision_start_token_id).squeeze(1)
-        vision_tokens = input_tokens_tensor[vision_start_indices + 1]
-        image_nums = (vision_tokens == image_token_id).sum()
-        video_nums = (vision_tokens == video_token_id).sum()
-        llm_pos_ids_list: list = []
-
-        st = 0
-        remain_images, remain_videos = image_nums, video_nums
-
-        image_index, video_index = 0, 0
-        for _ in range(image_nums + video_nums):
-            video_second_per_grid_t = 0.0
-            if image_token_id in input_tokens and remain_images > 0:
-                ed_image = input_tokens.index(image_token_id, st)
-            else:
-                ed_image = len(input_tokens) + 1
-            if video_token_id in input_tokens and remain_videos > 0:
-                ed_video = input_tokens.index(video_token_id, st)
-            else:
-                ed_video = len(input_tokens) + 1
-            if ed_image < ed_video:
-                t, h, w = (
-                    image_grid_thw[image_index][0],
-                    image_grid_thw[image_index][1],
-                    image_grid_thw[image_index][2],
-                )
-                image_index += 1
-                remain_images -= 1
-                ed = ed_image
-            else:
-                t, h, w = (
-                    video_grid_thw[video_index][0],
-                    video_grid_thw[video_index][1],
-                    video_grid_thw[video_index][2],
-                )
-                video_second_per_grid_t = 1.0
-                if second_per_grid_ts:
-                    video_second_per_grid_t = second_per_grid_ts[video_index]
-                video_index += 1
-                remain_videos -= 1
-                ed = ed_video
-
-            llm_grid_t, llm_grid_h, llm_grid_w = \
-                t, h // spatial_merge_size, w // spatial_merge_size
-            text_len = ed - st
-
-            st_idx = llm_pos_ids_list[-1].max() + 1 if len(
-                llm_pos_ids_list) > 0 else 0
-            llm_pos_ids_list.append(
-                torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx)
-
-            t_index = (torch.arange(llm_grid_t).view(-1, 1).expand(
-                -1, llm_grid_h * llm_grid_w) * video_second_per_grid_t *
-                       tokens_per_second).long().flatten()
-
-            h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand(
-                llm_grid_t, -1, llm_grid_w).flatten()
-            w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand(
-                llm_grid_t, llm_grid_h, -1).flatten()
-            llm_pos_ids_list.append(
-                torch.stack([t_index, h_index, w_index]) + text_len + st_idx)
-            st = ed + llm_grid_t * llm_grid_h * llm_grid_w
-
-        if st < len(input_tokens):
-            st_idx = llm_pos_ids_list[-1].max() + 1 if len(
-                llm_pos_ids_list) > 0 else 0
-            text_len = len(input_tokens) - st
-            llm_pos_ids_list.append(
-                torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx)
-
-        llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
-        mrope_position_delta = (llm_positions.max() + 1 -
-                                len(input_tokens)).item()
-        llm_positions = llm_positions[:, context_len:seq_len]
-
-        return llm_positions, mrope_position_delta
-
-    @classmethod
-    def _omni_get_input_positions_tensor(
-        cls,
-        input_tokens: list[int],
-        hf_config: PretrainedConfig,
-        image_grid_thw: Union[list[list[int]], torch.Tensor],
-        video_grid_thw: Union[list[list[int]], torch.Tensor],
-        second_per_grid_ts: Optional[list[float]] = None,
-        context_len: int = 0,
-        seq_len: Optional[int] = None,
-        audio_feature_lengths: Optional[torch.Tensor] = None,
-        use_audio_in_video: bool = False,
-    ) -> tuple[torch.Tensor, int]:
-        """Get mrope input positions and delta value (Qwen2.5-Omni version).
-
-        Differences from MRotaryEmbedding:
-            1. Add audio support (and related `audio_feature_lengths`).
-            2. Add `use_audio_in_video` option to read audio from video inputs.
-                In this case, audio and vision position ids will be split into
-                chunks and interleaved.
-
-        Example:
-
-            (V_i are vision position ids, A_i are audio position ids)
-
-            |V_1 ...    V_n|A_1 ...   A_n|V_n+1 ... V_2n|A_n+1 ... A_2n|...
-            |vision chunk 1|audio chunk 1|vision chunk 2|audio chunk 2 |...
-        """
-
-        # TODO(fyabc): refactor and share more code with
-        #  _vl_get_input_positions_tensor.
-
-        thinker_config = hf_config.thinker_config
-        audio_token_id = thinker_config.audio_token_index
-        image_token_id = thinker_config.image_token_index
-        video_token_id = thinker_config.video_token_index
-        audio_start_token_id = thinker_config.audio_start_token_id
-        audio_end_token_id = thinker_config.audio_end_token_id
-        vision_start_token_id = thinker_config.vision_start_token_id
-        vision_end_token_id = thinker_config.vision_end_token_id
-        seconds_per_chunk = thinker_config.seconds_per_chunk
-        spatial_merge_size = thinker_config.vision_config.spatial_merge_size
-        tokens_per_second = getattr(thinker_config.vision_config,
-                                    "tokens_per_second", 25)
-
-        if isinstance(image_grid_thw, list):
-            image_grid_thw = torch.tensor(image_grid_thw)
-        if isinstance(video_grid_thw, list):
-            video_grid_thw = torch.tensor(video_grid_thw)
-
-        src_item = input_tokens
-        audio_seqlens = audio_feature_lengths
-        if not second_per_grid_ts:
-            second_per_grid_ts = [1] * video_grid_thw.shape[0]
-        audio_idx = 0
-        video_idx = 0
-        image_idx = 0
-        new_src_item: list[int] = []
-        llm_pos_ids_list: list[torch.Tensor] = []
-
-        idx = 0
-        while idx < len(src_item):
-            new_src_item_len = len(new_src_item)
-            start_idx = llm_pos_ids_list[-1].max() + 1 if len(
-                llm_pos_ids_list) > 0 else 0
-            if src_item[idx] not in [
-                    audio_token_id, video_token_id, image_token_id
-            ]:
-                if use_audio_in_video and idx > 0:
-                    if src_item[idx] == vision_end_token_id and \
-                        src_item[idx - 1] == audio_end_token_id:
-                        # processing the <|audio_eos|> before <|vision_eos|>
-                        start_idx -= 1
-                    elif src_item[idx] == audio_start_token_id and \
-                        src_item[idx - 1] == vision_start_token_id:
-                        # processing the <|audio_bos|> after <|vision_eos|>
-                        start_idx -= 1
-                new_src_item.append(src_item[idx])
-                llm_pos_ids = torch.tensor([start_idx],
-                                           dtype=torch.long).expand(3, -1)
-                llm_pos_ids_list.append(llm_pos_ids)
-            elif src_item[idx] == audio_token_id:
-                assert audio_seqlens is not None
-                audio_seqlen = audio_seqlens[audio_idx]
-                place_num = (((audio_seqlen - 1) // 2 + 1 - 2) // 2 + 1)
-                new_src_item.extend([audio_token_id] * place_num)
-                llm_pos_ids = torch.arange(place_num).expand(3, -1) + start_idx
-                llm_pos_ids_list.append(llm_pos_ids)
-                audio_idx += 1
-            elif src_item[idx] == image_token_id:
-                grid_t = image_grid_thw[image_idx][0]
-                grid_hs = image_grid_thw[:, 1]
-                grid_ws = image_grid_thw[:, 2]
-                t_index = (torch.arange(grid_t) * 1 * tokens_per_second).long()
-                llm_pos_ids = cls._get_llm_pos_ids_for_vision(
-                    start_idx, image_idx, spatial_merge_size, t_index, grid_hs,
-                    grid_ws)
-                llm_pos_ids_list.append(llm_pos_ids)
-                vision_seqlen = image_grid_thw[image_idx].prod() // (
-                    spatial_merge_size**2)
-                new_src_item.extend([image_token_id] * vision_seqlen)
-                image_idx += 1
-            elif src_item[idx] == video_token_id and not use_audio_in_video:
-                grid_t = video_grid_thw[video_idx][0]
-                grid_hs = video_grid_thw[:, 1]
-                grid_ws = video_grid_thw[:, 2]
-                t_index = (torch.arange(grid_t) *
-                           second_per_grid_ts[video_idx] *
-                           tokens_per_second).long()
-                llm_pos_ids = cls._get_llm_pos_ids_for_vision(
-                    start_idx, video_idx, spatial_merge_size, t_index, grid_hs,
-                    grid_ws)
-                llm_pos_ids_list.append(llm_pos_ids)
-                vision_seqlen = video_grid_thw[video_idx].prod() // (
-                    spatial_merge_size**2)
-                new_src_item.extend([video_token_id] * vision_seqlen)
-                video_idx += 1
-            else:
-                # read audio from video
-                assert audio_seqlens is not None
-                audio_seqlen = audio_seqlens[audio_idx]
-                vision_seqlen = video_grid_thw[video_idx].prod() // (
-                    spatial_merge_size**2)
-                grid_t = video_grid_thw[video_idx][0]
-                grid_h = video_grid_thw[video_idx][1]
-                grid_w = video_grid_thw[video_idx][2]
-                grid_hs = video_grid_thw[:, 1]
-                grid_ws = video_grid_thw[:, 2]
-                t_ntoken_per_chunk = int(tokens_per_second * seconds_per_chunk)
-                t_index = (torch.arange(grid_t) *
-                           second_per_grid_ts[video_idx] *
-                           tokens_per_second).long()
-                t_index_split_chunk = cls._split_list_into_ranges(
-                    t_index, t_ntoken_per_chunk)
-                place_num = (((audio_seqlen - 1) // 2 + 1 - 2) // 2 + 1) + 2
-                pure_audio_len = place_num - 2
-                added_audio_len = 0
-                audio_llm_pos_ids_list: list[torch.Tensor] = []
-                for t_chunk in t_index_split_chunk:
-                    vision_ntoken_per_chunk = len(
-                        t_chunk) * grid_h * grid_w // (spatial_merge_size**2)
-                    new_src_item.extend([video_token_id] *
-                                        vision_ntoken_per_chunk)
-                    vision_llm_pos_ids_list = cls._get_llm_pos_ids_for_vision(
-                        start_idx, video_idx, spatial_merge_size, t_chunk,
-                        grid_hs, grid_ws).split(1, dim=1)
-                    llm_pos_ids_list.extend(vision_llm_pos_ids_list)
-                    new_src_item.extend(
-                        min(t_ntoken_per_chunk, pure_audio_len -
-                            added_audio_len) * [audio_token_id])
-                    audio_start_idx = start_idx if len(
-                        audio_llm_pos_ids_list
-                    ) == 0 else audio_llm_pos_ids_list[-1][0].item() + 1
-                    if min(t_ntoken_per_chunk,
-                           pure_audio_len - added_audio_len) > 0:
-                        audio_llm_pos_ids_list = (torch.arange(
-                            min(t_ntoken_per_chunk, pure_audio_len -
-                                added_audio_len)).expand(3, -1) +
-                                                  audio_start_idx).split(1,
-                                                                         dim=1)
-                    else:
-                        audio_llm_pos_ids_list = []
-                    added_audio_len += min(t_ntoken_per_chunk,
-                                           pure_audio_len - added_audio_len)
-                    llm_pos_ids_list.extend(audio_llm_pos_ids_list)
-                if added_audio_len < pure_audio_len:
-                    new_src_item.extend(
-                        (pure_audio_len - added_audio_len) * [audio_token_id])
-                    audio_llm_pos_ids_list = (
-                        torch.arange(pure_audio_len - added_audio_len).expand(
-                            3, -1) + llm_pos_ids_list[-1].max() + 1).split(
-                                1, dim=1)
-                    llm_pos_ids_list.extend(audio_llm_pos_ids_list)
-                audio_idx += 1
-                video_idx += 1
-            # move to the next token
-            idx += len(new_src_item) - new_src_item_len
-
-        llm_positions = torch.cat(llm_pos_ids_list, dim=1)
-        mrope_position_delta = torch.cat(llm_pos_ids_list,
-                                         dim=1).max() + 1 - len(src_item)
-        llm_positions = llm_positions[:, context_len:seq_len]
-
-        return llm_positions, mrope_position_delta
-
-    @staticmethod
-    def _get_llm_pos_ids_for_vision(
-        start_idx: int,
-        vision_idx: int,
-        spatial_merge_size: int,
-        t_index: list[int],
-        grid_hs: torch.Tensor,
-        grid_ws: torch.Tensor,
-    ) -> torch.Tensor:
-        llm_pos_ids_list = []
-        llm_grid_h = grid_hs[vision_idx] // spatial_merge_size
-        llm_grid_w = grid_ws[vision_idx] // spatial_merge_size
-        h_index = (torch.arange(llm_grid_h).view(1, -1, 1).expand(
-            len(t_index), -1, llm_grid_w).flatten())
-        w_index = (torch.arange(llm_grid_w).view(1, 1, -1).expand(
-            len(t_index), llm_grid_h, -1).flatten())
-        t_index_tensor = torch.Tensor(t_index).to(llm_grid_h.device).view(
-            -1, 1).expand(-1, llm_grid_h * llm_grid_w).long().flatten()
-        _llm_pos_ids = torch.stack([t_index_tensor, h_index, w_index])
-        llm_pos_ids_list.append(_llm_pos_ids + start_idx)
-        llm_pos_ids = torch.cat(llm_pos_ids_list, dim=1)
-        return llm_pos_ids
-
-    @staticmethod
-    def _split_list_into_ranges(lst: torch.Tensor,
-                                interval: int) -> list[list[int]]:
-        ranges: list[list[int]] = [[]
-                                   for _ in range((max(lst) // interval) + 1)]
-        for num in lst:
-            index = num // interval
-            ranges[index].append(num)
-        return ranges
-
-    @staticmethod
-    def get_next_input_positions(
-        mrope_position_delta: int,
-        context_len: int,
-        seq_len: int,
-    ) -> list[list[int]]:
-        return [
-            list(
-                range(context_len + mrope_position_delta,
-                      seq_len + mrope_position_delta)) for _ in range(3)
-        ]
-
-    @staticmethod
-    def get_next_input_positions_tensor(out: np.ndarray, out_offset: int,
-                                        mrope_position_delta: int,
-                                        context_len: int, num_new_tokens: int):
-
-        values = np.arange(mrope_position_delta + context_len,
-                           mrope_position_delta + context_len + num_new_tokens,
-                           dtype=out.dtype)
-        out[:, out_offset:out_offset + num_new_tokens] = values
-
-    @classmethod
-    def omni_get_updates_use_audio_in_video(
-        cls,
-        thinker_config: PretrainedConfig,
-        audio_len: int,
-        video_grid_thw: Union[list[int], torch.Tensor],
-        video_second_per_grid_t: float,
-    ) -> list[int]:
-        """Get video prompt updates when `use_audio_in_video` is True.
-
-        In this case, audio and vision update ids will be split into
-        chunks and interleaved (details in `_omni_get_input_positions_tensor`).
-
-        <|video_bos|><|VIDEO|><|video_eos|> =>
-        <|video_bos|><|audio_bos|>(... chunks ...)<|audio_eos|><|video_eos|>
-        """
-
-        audio_token_id = thinker_config.audio_token_index
-        video_token_id = thinker_config.video_token_index
-        audio_start_token_id = thinker_config.audio_start_token_id
-        audio_end_token_id = thinker_config.audio_end_token_id
-        seconds_per_chunk = thinker_config.seconds_per_chunk
-        spatial_merge_size = thinker_config.vision_config.spatial_merge_size
-        tokens_per_second = getattr(thinker_config.vision_config,
-                                    "tokens_per_second", 25)
-
-        grid_t = video_grid_thw[0]
-        grid_h = video_grid_thw[1]
-        grid_w = video_grid_thw[2]
-        t_ntoken_per_chunk = int(tokens_per_second * seconds_per_chunk)
-        t_index = (torch.arange(grid_t) * video_second_per_grid_t *
-                   tokens_per_second).long()
-        t_index_split_chunk = cls._split_list_into_ranges(
-            t_index, t_ntoken_per_chunk)
-
-        updates = [audio_start_token_id]
-        added_audio_len = 0
-        for t_chunk in t_index_split_chunk:
-            vision_ntoken_per_chunk = len(t_chunk) * grid_h * grid_w // (
-                spatial_merge_size**2)
-            updates.extend([video_token_id] * vision_ntoken_per_chunk)
-
-            audio_chunk_size = min(t_ntoken_per_chunk,
-                                   audio_len - added_audio_len)
-            updates.extend(audio_chunk_size * [audio_token_id])
-            added_audio_len += audio_chunk_size
-        if added_audio_len < audio_len:
-            updates.extend((audio_len - added_audio_len) * [audio_token_id])
-        updates.extend([audio_end_token_id])
-
-        return updates
-
-
-@CustomOp.register("dual_chunk_rotary_embedding")
-class DualChunkRotaryEmbedding(CustomOp):
-    """Rotary positional embedding for Dual Chunk Attention."""
-
-    def __init__(
-        self,
-        head_size: int,
-        rotary_dim: int,
-        max_position_embeddings: int,
-        base: float,
-        is_neox_style: bool,
-        dtype: torch.dtype,
-        chunk_size: int,
-        local_size: int,
-    ) -> None:
-        super().__init__()
-        self.head_size = head_size
-        self.rotary_dim = rotary_dim
-        self.max_position_embeddings = max_position_embeddings
-        self.base = base
-        self.is_neox_style = is_neox_style
-        self.chunk_size = chunk_size
-        self.local_size = local_size
-        self.dtype = dtype
-        self.device = torch.device(f"cuda:{torch.cuda.current_device()}")
-        (q_cache, qc_cache, k_cache, qc_no_clamp_cache,
-         q_inter_cache) = self._compute_cos_sin_cache()
-
-        self.register_buffer("cos_sin_q_cache", q_cache, persistent=False)
-        self.register_buffer("cos_sin_qc_cache", qc_cache, persistent=False)
-        self.register_buffer("cos_sin_k_cache", k_cache, persistent=False)
-        self.register_buffer("cos_sin_qc_no_clamp_cache",
-                             qc_no_clamp_cache,
-                             persistent=False)
-        self.register_buffer("cos_sin_q_inter_cache",
-                             q_inter_cache,
-                             persistent=False)
-
-    def _compute_inv_freq(self, base: float) -> torch.Tensor:
-        """Compute the inverse frequency."""
-        # NOTE(woosuk): The HF implementation uses `torch.arange(...).float()`.
-        # However, we use `torch.arange(..., dtype=torch.float)` instead to
-        # avoid numerical issues with large base values (e.g., 10000000).
-        # This may cause a slight numerical difference between the HF
-        # implementation and ours.
-        # NOTE(woosuk): To exactly match the HF implementation, we need to
-        # use CPU to compute the cache and then move it to GPU. However, we
-        # create the cache on GPU for faster initialization. This may cause
-        # a slight numerical difference between the HF implementation and ours.
-        inv_freq = 1.0 / (base**(torch.arange(
-            0, self.rotary_dim, 2, dtype=torch.float) / self.rotary_dim))
-        return inv_freq
-
-    def _compute_cos_sin_cache(self) -> torch.Tensor:
-        """Compute the cos and sin cache."""
-        inv_freq = self._compute_inv_freq(self.base)
-        chunk_len = self.chunk_size - self.local_size
-        q_t = torch.arange(chunk_len, dtype=torch.float)
-        qc_t = (torch.arange(chunk_len, dtype=torch.float) +
-                chunk_len).clamp(max=self.chunk_size)
-        k_t = torch.arange(self.max_position_embeddings,
-                           dtype=torch.float) % chunk_len
-
-        # count from chunk_len, no clamp(self.chunk_size) restriction
-        qc_no_clamp_t = torch.arange(chunk_len, dtype=torch.float) + chunk_len
-        # count from self.chunk_size for q_inter's rope
-        q_inter_t = torch.arange(chunk_len,
-                                 dtype=torch.float) + self.chunk_size
-
-        q_freqs = torch.outer(q_t, inv_freq)
-        qc_freqs = torch.outer(qc_t, inv_freq)
-        k_freqs = torch.outer(k_t, inv_freq)
-        qc_no_clamp_freqs = torch.outer(qc_no_clamp_t, inv_freq)
-        q_inter_freqs = torch.outer(q_inter_t, inv_freq)
-
-        q_cos = q_freqs.cos()
-        q_sin = q_freqs.sin()
-        qc_cos = qc_freqs.cos()
-        qc_sin = qc_freqs.sin()
-        k_cos = k_freqs.cos()
-        k_sin = k_freqs.sin()
-
-        qc_no_clamp_cos = qc_no_clamp_freqs.cos()
-        qc_no_clamp_sin = qc_no_clamp_freqs.sin()
-        q_inter_cos = q_inter_freqs.cos()
-        q_inter_sin = q_inter_freqs.sin()
-
-        q_cache = torch.cat((q_cos, q_sin), dim=-1).to(dtype=self.dtype,
-                                                       device=self.device)
-        qc_cache = torch.cat((qc_cos, qc_sin), dim=-1).to(dtype=self.dtype,
-                                                          device=self.device)
-        k_cache = torch.cat((k_cos, k_sin), dim=-1).to(dtype=self.dtype,
-                                                       device=self.device)
-        qc_no_clamp_cache = torch.cat((qc_no_clamp_cos, qc_no_clamp_sin),
-                                      dim=-1).to(dtype=self.dtype,
-                                                 device=self.device)
-        q_inter_cache = torch.cat((q_inter_cos, q_inter_sin),
-                                  dim=-1).to(dtype=self.dtype,
-                                             device=self.device)
-        return q_cache, qc_cache, k_cache, qc_no_clamp_cache, q_inter_cache
-
-    def forward(
-        self,
-        positions: torch.Tensor,
-        query: torch.Tensor,
-        key: torch.Tensor,
-        offsets: Optional[torch.Tensor] = None,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        query = query.view(*query.shape[:-1], -1, self.head_size)
-        key = key.view(*key.shape[:-1], -1, self.head_size)
-        query_rot = query[..., :self.rotary_dim]
-        key_rot = key[..., :self.rotary_dim]
-        if self.rotary_dim < self.head_size:
-            query_pass = query[..., self.rotary_dim:]
-            key_pass = key[..., self.rotary_dim:]
-        else:
-            query_pass = None
-            key_pass = None
-
-        positions_with_offsets = (torch.add(positions, offsets)
-                                  if offsets is not None else positions)
-        key = self._apply_rotary_embedding(
-            self.cos_sin_k_cache[positions_with_offsets], key_rot, key_pass)
-        chunk_len = self.chunk_size - self.local_size
-        query = self._apply_rotary_embedding(
-            self.cos_sin_q_cache[positions_with_offsets % chunk_len],
-            query_rot, query_pass)
-        query_succ = self._apply_rotary_embedding(
-            self.cos_sin_qc_cache[positions_with_offsets % chunk_len],
-            query_rot, query_pass)
-        query_inter = self._apply_rotary_embedding(
-            self.cos_sin_qc_cache[chunk_len - 1].repeat(positions.shape[0], 1),
-            query_rot, query_pass)
-        query_succ_critical = self._apply_rotary_embedding(
-            self.cos_sin_qc_no_clamp_cache[positions_with_offsets % chunk_len],
-            query_rot, query_pass)
-        query_inter_critical = self._apply_rotary_embedding(
-            self.cos_sin_q_inter_cache[positions_with_offsets % chunk_len],
-            query_rot, query_pass)
-
-        # merge query into one tensor to simplify the interfaces
-        query = torch.cat((
-            query,
-            query_succ,
-            query_inter,
-            query_succ_critical,
-            query_inter_critical,
-        ),
-                          dim=-1)
-        return query, key
-
-    def _apply_rotary_embedding(self, cos_sin, hidden_rot, hidden_pass):
-        cos, sin = cos_sin.chunk(2, dim=-1)
-        if self.is_neox_style:
-            # NOTE(woosuk): Here we assume that the positions tensor has the
-            # shape [batch_size, seq_len].
-            cos = cos.repeat(1, 1, 2).unsqueeze(-2)
-            sin = sin.repeat(1, 1, 2).unsqueeze(-2)
-        else:
-            cos = cos.repeat_interleave(2, dim=-1).unsqueeze(-2)
-            sin = sin.repeat_interleave(2, dim=-1).unsqueeze(-2)
-        rotate_fn = _rotate_neox if self.is_neox_style else _rotate_gptj
-        hidden_rot = hidden_rot * cos + rotate_fn(hidden_rot) * sin
-
-        if self.rotary_dim < self.head_size:
-            hidden = torch.cat((hidden_rot, hidden_pass), dim=-1)
-        else:
-            hidden = hidden_rot
-        return hidden.flatten(-2).squeeze(0)
-
-    def extra_repr(self) -> str:
-        s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}"
-        s += f", max_position_embeddings={self.max_position_embeddings}"
-        s += f", base={self.base}, is_neox_style={self.is_neox_style}"
-        s += f", chunk_size={self.chunk_size}, local_size={self.local_size}"
-        return s
-
-
-_ROPE_DICT: dict[tuple, RotaryEmbedding] = {}
-
-
-def get_rope(
-    head_size: int,
-    rotary_dim: int,
-    max_position: int,
-    base: float,
-    is_neox_style: bool = True,
-    rope_scaling: Optional[dict[str, Any]] = None,
-    dtype: Optional[torch.dtype] = None,
-    partial_rotary_factor: float = 1.0,
-    dual_chunk_attention_config: Optional[dict[str, Any]] = None,
-) -> RotaryEmbedding:
-    if dtype is None:
-        dtype = torch.get_default_dtype()
-    if rope_scaling is not None:
-        # Transforms every value that is a list into a tuple for caching calls
-        rope_scaling_tuple = {
-            k: tuple(v) if isinstance(v, list) else v
-            for k, v in rope_scaling.items()
-        }
-        rope_scaling_args = tuple(rope_scaling_tuple.items())
-    else:
-        rope_scaling_args = None
-
-    if dual_chunk_attention_config is not None:
-        dual_chunk_attention_tuple = {
-            k: tuple(v) if isinstance(v, list) else v
-            for k, v in dual_chunk_attention_config.items()
-            if k != "sparse_attention_config"
-        }
-        dual_chunk_attention_args = tuple(dual_chunk_attention_tuple.items())
-    else:
-        dual_chunk_attention_args = None
-
-    if partial_rotary_factor < 1.0:
-        rotary_dim = int(rotary_dim * partial_rotary_factor)
-    key = (head_size, rotary_dim, max_position, base, is_neox_style,
-           rope_scaling_args, dual_chunk_attention_args, dtype)
-    if key in _ROPE_DICT:
-        return _ROPE_DICT[key]
-
-    if dual_chunk_attention_config is not None:
-        extra_kwargs = {
-            k: v
-            for k, v in dual_chunk_attention_config.items()
-            if k in ("chunk_size", "local_size")
-        }
-        rotary_emb = DualChunkRotaryEmbedding(head_size, rotary_dim,
-                                              max_position, base,
-                                              is_neox_style, dtype,
-                                              **extra_kwargs)
-    elif not rope_scaling:
-        rotary_emb = RotaryEmbedding(head_size, rotary_dim, max_position, base,
-                                     is_neox_style, dtype)
-    else:
-        scaling_type = rope_scaling["rope_type"]
-
-        if scaling_type == "llama3":
-            scaling_factor = rope_scaling["factor"]
-            low_freq_factor = rope_scaling["low_freq_factor"]
-            high_freq_factor = rope_scaling["high_freq_factor"]
-            original_max_position = rope_scaling[
-                "original_max_position_embeddings"]
-            rotary_emb = Llama3RotaryEmbedding(head_size, rotary_dim,
-                                               max_position, base,
-                                               is_neox_style, dtype,
-                                               scaling_factor, low_freq_factor,
-                                               high_freq_factor,
-                                               original_max_position)
-        elif scaling_type == "mllama4":
-            rotary_emb = Llama4VisionRotaryEmbedding(head_size, rotary_dim,
-                                                     max_position, base,
-                                                     is_neox_style, dtype)
-        elif scaling_type == "default":
-            if "mrope_section" in rope_scaling:
-                rotary_emb = MRotaryEmbedding(
-                    head_size,
-                    rotary_dim,
-                    max_position,
-                    base,
-                    is_neox_style,
-                    dtype,
-                    mrope_section=rope_scaling["mrope_section"],
-                )
-            else:
-                rotary_emb = RotaryEmbedding(
-                    head_size,
-                    rotary_dim,
-                    max_position,
-                    base,
-                    is_neox_style,
-                    dtype,
-                )
-        elif scaling_type == "linear":
-            scaling_factor = rope_scaling["factor"]
-            rotary_emb = LinearScalingRotaryEmbedding(head_size, rotary_dim,
-                                                      max_position, base,
-                                                      is_neox_style,
-                                                      scaling_factor, dtype)
-        elif scaling_type == "ntk":
-            scaling_factor = rope_scaling["factor"]
-            mixed_b = rope_scaling.get('mixed_b', None)
-            rotary_emb = NTKScalingRotaryEmbedding(head_size, rotary_dim,
-                                                   max_position, base,
-                                                   is_neox_style,
-                                                   scaling_factor, dtype,
-                                                   mixed_b)
-        elif scaling_type == "dynamic":
-            if "alpha" in rope_scaling:
-                scaling_alpha = rope_scaling["alpha"]
-                rotary_emb = DynamicNTKAlphaRotaryEmbedding(
-                    head_size, rotary_dim, max_position, base, is_neox_style,
-                    scaling_alpha, dtype)
-            elif "factor" in rope_scaling:
-                scaling_factor = rope_scaling["factor"]
-                rotary_emb = DynamicNTKScalingRotaryEmbedding(
-                    head_size, rotary_dim, max_position, base, is_neox_style,
-                    scaling_factor, dtype)
-            else:
-                raise ValueError("Dynamic rope scaling must contain either "
-                                 "'alpha' or 'factor' field")
-        elif scaling_type == "yarn":
-            scaling_factor = rope_scaling["factor"]
-            original_max_position = rope_scaling[
-                "original_max_position_embeddings"]
-            extra_kwargs = {
-                k: v
-                for k, v in rope_scaling.items()
-                if k in ("extrapolation_factor", "attn_factor", "beta_fast",
-                         "beta_slow")
-            }
-            rotary_emb = YaRNScalingRotaryEmbedding(head_size, rotary_dim,
-                                                    original_max_position,
-                                                    base, is_neox_style,
-                                                    scaling_factor, dtype,
-                                                    **extra_kwargs)
-        elif scaling_type == "deepseek_yarn":
-            scaling_factor = rope_scaling["factor"]
-            original_max_position = rope_scaling[
-                "original_max_position_embeddings"]
-            # assert max_position == original_max_position * scaling_factor
-            extra_kwargs = {
-                k: v
-                for k, v in rope_scaling.items()
-                if k in ("extrapolation_factor", "attn_factor", "beta_fast",
-                         "beta_slow", "mscale", "mscale_all_dim")
-            }
-            rotary_emb = DeepseekScalingRotaryEmbedding(
-                head_size, rotary_dim, original_max_position, base,
-                is_neox_style, scaling_factor, dtype, **extra_kwargs)
-        elif scaling_type == "longrope":
-            short_factor = rope_scaling["short_factor"]
-            long_factor = rope_scaling["long_factor"]
-            original_max_position = rope_scaling[
-                "original_max_position_embeddings"]
-            extra_kwargs = {
-                k: v
-                for k, v in rope_scaling.items()
-                if k in ("short_mscale", "long_mscale")
-            }
-            rotary_emb = Phi3LongRoPEScaledRotaryEmbedding(
-                head_size, rotary_dim, max_position, original_max_position,
-                base, is_neox_style, dtype, short_factor, long_factor,
-                **extra_kwargs)
-        else:
-            raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
-    _ROPE_DICT[key] = rotary_emb
-    return rotary_emb
diff --git a/vllm/model_executor/layers/rotary_embedding/__init__.py b/vllm/model_executor/layers/rotary_embedding/__init__.py
new file mode 100644
index 0000000000000..564f9a5c00750
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/__init__.py
@@ -0,0 +1,190 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Rotary Positional Embeddings."""
+from typing import Any, Optional
+
+import torch
+
+from .base import RotaryEmbedding
+from .deepseek_scaling_rope import DeepseekScalingRotaryEmbedding
+from .dual_chunk_rope import DualChunkRotaryEmbedding
+from .dynamic_ntk_alpha_rope import DynamicNTKAlphaRotaryEmbedding
+from .dynamic_ntk_scaling_rope import DynamicNTKScalingRotaryEmbedding
+from .linear_scaling_rope import LinearScalingRotaryEmbedding
+from .llama3_rope import Llama3RotaryEmbedding
+from .llama4_vision_rope import Llama4VisionRotaryEmbedding
+from .mrope import MRotaryEmbedding
+from .ntk_scaling_rope import NTKScalingRotaryEmbedding
+from .phi3_long_rope_scaled_rope import Phi3LongRoPEScaledRotaryEmbedding
+from .yarn_scaling_rope import YaRNScalingRotaryEmbedding
+
+_ROPE_DICT: dict[tuple, RotaryEmbedding] = {}
+
+
+def get_rope(
+    head_size: int,
+    rotary_dim: int,
+    max_position: int,
+    base: float,
+    is_neox_style: bool = True,
+    rope_scaling: Optional[dict[str, Any]] = None,
+    dtype: Optional[torch.dtype] = None,
+    partial_rotary_factor: float = 1.0,
+    dual_chunk_attention_config: Optional[dict[str, Any]] = None,
+) -> RotaryEmbedding:
+    if dtype is None:
+        dtype = torch.get_default_dtype()
+    if rope_scaling is not None:
+        # Transforms every value that is a list into a tuple for caching calls
+        rope_scaling_tuple = {
+            k: tuple(v) if isinstance(v, list) else v
+            for k, v in rope_scaling.items()
+        }
+        rope_scaling_args = tuple(rope_scaling_tuple.items())
+    else:
+        rope_scaling_args = None
+
+    if dual_chunk_attention_config is not None:
+        dual_chunk_attention_tuple = {
+            k: tuple(v) if isinstance(v, list) else v
+            for k, v in dual_chunk_attention_config.items()
+            if k != "sparse_attention_config"
+        }
+        dual_chunk_attention_args = tuple(dual_chunk_attention_tuple.items())
+    else:
+        dual_chunk_attention_args = None
+
+    if partial_rotary_factor < 1.0:
+        rotary_dim = int(rotary_dim * partial_rotary_factor)
+    key = (head_size, rotary_dim, max_position, base, is_neox_style,
+           rope_scaling_args, dual_chunk_attention_args, dtype)
+    if key in _ROPE_DICT:
+        return _ROPE_DICT[key]
+
+    if dual_chunk_attention_config is not None:
+        extra_kwargs = {
+            k: v
+            for k, v in dual_chunk_attention_config.items()
+            if k in ("chunk_size", "local_size")
+        }
+        rotary_emb = DualChunkRotaryEmbedding(head_size, rotary_dim,
+                                              max_position, base,
+                                              is_neox_style, dtype,
+                                              **extra_kwargs)
+    elif not rope_scaling:
+        rotary_emb = RotaryEmbedding(head_size, rotary_dim, max_position, base,
+                                     is_neox_style, dtype)
+    else:
+        scaling_type = rope_scaling["rope_type"]
+
+        if scaling_type == "llama3":
+            scaling_factor = rope_scaling["factor"]
+            low_freq_factor = rope_scaling["low_freq_factor"]
+            high_freq_factor = rope_scaling["high_freq_factor"]
+            original_max_position = rope_scaling[
+                "original_max_position_embeddings"]
+            rotary_emb = Llama3RotaryEmbedding(head_size, rotary_dim,
+                                               max_position, base,
+                                               is_neox_style, dtype,
+                                               scaling_factor, low_freq_factor,
+                                               high_freq_factor,
+                                               original_max_position)
+        elif scaling_type == "mllama4":
+            rotary_emb = Llama4VisionRotaryEmbedding(head_size, rotary_dim,
+                                                     max_position, base,
+                                                     is_neox_style, dtype)
+        elif scaling_type == "default":
+            if "mrope_section" in rope_scaling:
+                rotary_emb = MRotaryEmbedding(
+                    head_size,
+                    rotary_dim,
+                    max_position,
+                    base,
+                    is_neox_style,
+                    dtype,
+                    mrope_section=rope_scaling["mrope_section"],
+                )
+            else:
+                rotary_emb = RotaryEmbedding(
+                    head_size,
+                    rotary_dim,
+                    max_position,
+                    base,
+                    is_neox_style,
+                    dtype,
+                )
+        elif scaling_type == "linear":
+            scaling_factor = rope_scaling["factor"]
+            rotary_emb = LinearScalingRotaryEmbedding(head_size, rotary_dim,
+                                                      max_position, base,
+                                                      is_neox_style,
+                                                      scaling_factor, dtype)
+        elif scaling_type == "ntk":
+            scaling_factor = rope_scaling["factor"]
+            mixed_b = rope_scaling.get('mixed_b', None)
+            rotary_emb = NTKScalingRotaryEmbedding(head_size, rotary_dim,
+                                                   max_position, base,
+                                                   is_neox_style,
+                                                   scaling_factor, dtype,
+                                                   mixed_b)
+        elif scaling_type == "dynamic":
+            if "alpha" in rope_scaling:
+                scaling_alpha = rope_scaling["alpha"]
+                rotary_emb = DynamicNTKAlphaRotaryEmbedding(
+                    head_size, rotary_dim, max_position, base, is_neox_style,
+                    scaling_alpha, dtype)
+            elif "factor" in rope_scaling:
+                scaling_factor = rope_scaling["factor"]
+                rotary_emb = DynamicNTKScalingRotaryEmbedding(
+                    head_size, rotary_dim, max_position, base, is_neox_style,
+                    scaling_factor, dtype)
+            else:
+                raise ValueError("Dynamic rope scaling must contain either "
+                                 "'alpha' or 'factor' field")
+        elif scaling_type == "yarn":
+            scaling_factor = rope_scaling["factor"]
+            original_max_position = rope_scaling[
+                "original_max_position_embeddings"]
+            extra_kwargs = {
+                k: v
+                for k, v in rope_scaling.items()
+                if k in ("extrapolation_factor", "attn_factor", "beta_fast",
+                         "beta_slow")
+            }
+            rotary_emb = YaRNScalingRotaryEmbedding(head_size, rotary_dim,
+                                                    original_max_position,
+                                                    base, is_neox_style,
+                                                    scaling_factor, dtype,
+                                                    **extra_kwargs)
+        elif scaling_type == "deepseek_yarn":
+            scaling_factor = rope_scaling["factor"]
+            original_max_position = rope_scaling[
+                "original_max_position_embeddings"]
+            # assert max_position == original_max_position * scaling_factor
+            extra_kwargs = {
+                k: v
+                for k, v in rope_scaling.items()
+                if k in ("extrapolation_factor", "attn_factor", "beta_fast",
+                         "beta_slow", "mscale", "mscale_all_dim")
+            }
+            rotary_emb = DeepseekScalingRotaryEmbedding(
+                head_size, rotary_dim, original_max_position, base,
+                is_neox_style, scaling_factor, dtype, **extra_kwargs)
+        elif scaling_type == "longrope":
+            short_factor = rope_scaling["short_factor"]
+            long_factor = rope_scaling["long_factor"]
+            original_max_position = rope_scaling[
+                "original_max_position_embeddings"]
+            extra_kwargs = {
+                k: v
+                for k, v in rope_scaling.items()
+                if k in ("short_mscale", "long_mscale")
+            }
+            rotary_emb = Phi3LongRoPEScaledRotaryEmbedding(
+                head_size, rotary_dim, max_position, original_max_position,
+                base, is_neox_style, dtype, short_factor, long_factor,
+                **extra_kwargs)
+        else:
+            raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
+    _ROPE_DICT[key] = rotary_emb
+    return rotary_emb
diff --git a/vllm/model_executor/layers/rotary_embedding/base.py b/vllm/model_executor/layers/rotary_embedding/base.py
new file mode 100644
index 0000000000000..10fce857a8ae2
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/base.py
@@ -0,0 +1,237 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Rotary Positional Embeddings Base Class."""
+from typing import Optional
+
+import torch
+
+from vllm.model_executor.custom_op import CustomOp
+
+from .common import apply_rotary_emb_dispatch, apply_rotary_emb_torch
+
+
+@CustomOp.register("rotary_embedding")
+class RotaryEmbedding(CustomOp):
+    """Original rotary positional embedding."""
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        dtype: torch.dtype,
+    ) -> None:
+        super().__init__()
+        self.head_size = head_size
+        self.rotary_dim = rotary_dim
+        self.max_position_embeddings = max_position_embeddings
+        self.base = base
+        self.is_neox_style = is_neox_style
+        self.dtype = dtype
+
+        cache = self._compute_cos_sin_cache()
+        cache = cache.to(dtype)
+        self.cos_sin_cache: torch.Tensor
+        self.register_buffer("cos_sin_cache", cache, persistent=False)
+
+    def _compute_inv_freq(self, base: float) -> torch.Tensor:
+        """Compute the inverse frequency."""
+        # NOTE(woosuk): To exactly match the HF implementation, we need to
+        # use CPU to compute the cache and then move it to GPU. However, we
+        # create the cache on GPU for faster initialization. This may cause
+        # a slight numerical difference between the HF implementation and ours.
+        inv_freq = 1.0 / (base**(torch.arange(
+            0, self.rotary_dim, 2, dtype=torch.float) / self.rotary_dim))
+        return inv_freq
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        """Compute the cos and sin cache."""
+        inv_freq = self._compute_inv_freq(self.base)
+        t = torch.arange(self.max_position_embeddings, dtype=torch.float)
+
+        freqs = torch.einsum("i,j -> ij", t, inv_freq)
+        cos = freqs.cos()
+        sin = freqs.sin()
+        cache = torch.cat((cos, sin), dim=-1)
+        return cache
+
+    def forward_native(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: Optional[torch.Tensor] = None,
+        offsets: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
+        """A PyTorch-native implementation of forward()."""
+        if offsets is not None:
+            positions = positions + offsets
+        positions = positions.flatten()
+        num_tokens = positions.shape[0]
+        cos_sin = self.cos_sin_cache.index_select(0, positions)
+        cos, sin = cos_sin.chunk(2, dim=-1)
+
+        query_shape = query.shape
+        query = query.view(num_tokens, -1, self.head_size)
+        query_rot = query[..., :self.rotary_dim]
+        query_pass = query[..., self.rotary_dim:]
+        query_rot = apply_rotary_emb_torch(query_rot, cos, sin,
+                                           self.is_neox_style)
+        query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
+
+        # key may be None in some cases, e.g. cross-layer KV sharing
+        if key is not None:
+            key_shape = key.shape
+            key = key.view(num_tokens, -1, self.head_size)
+            key_rot = key[..., :self.rotary_dim]
+            key_pass = key[..., self.rotary_dim:]
+            key_rot = apply_rotary_emb_torch(key_rot, cos, sin,
+                                             self.is_neox_style)
+            key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
+        return query, key
+
+    def forward_cuda(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: Optional[torch.Tensor] = None,
+        offsets: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
+        from vllm import _custom_ops as ops
+
+        # __setattr__ in nn.Module (called by `self.cos_sin_cache = ...`)
+        # is expensive, so avoid calling it if possible
+        if self.cos_sin_cache.device != query.device or \
+            self.cos_sin_cache.dtype != query.dtype:
+            self.cos_sin_cache = self.cos_sin_cache.to(query.device,
+                                                       dtype=query.dtype)
+
+        # ops.rotary_embedding()/batched_rotary_embedding()
+        # are in-place operations that update the query and key tensors.
+        if offsets is not None:
+            ops.batched_rotary_embedding(positions, query, key, self.head_size,
+                                         self.cos_sin_cache,
+                                         self.is_neox_style, self.rotary_dim,
+                                         offsets)
+        else:
+            ops.rotary_embedding(positions, query, key, self.head_size,
+                                 self.cos_sin_cache, self.is_neox_style)
+        return query, key
+
+    def forward_xpu(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: Optional[torch.Tensor] = None,
+        offsets: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
+        from vllm._ipex_ops import ipex_ops as ops
+
+        self.cos_sin_cache = self.cos_sin_cache.to(positions.device,
+                                                   dtype=query.dtype)
+        # ops.rotary_embedding()/batched_rotary_embedding()
+        # are in-place operations that update the query and key tensors.
+        if key is None:
+            # XPU kernel doesn't support key=None so fall back to native impl
+            # TODO(sarckk): add support for optional key in
+            # ipex.llm.functional.rotary_embedding_batched
+            return self.forward_native(positions, query, key, offsets)
+        else:
+            if offsets is not None:
+                ops.batched_rotary_embedding(positions, query, key,
+                                             self.head_size,
+                                             self.cos_sin_cache,
+                                             self.is_neox_style,
+                                             self.rotary_dim, offsets)
+            else:
+                ops.rotary_embedding(positions, query, key, self.head_size,
+                                     self.cos_sin_cache, self.is_neox_style)
+        return query, key
+
+    def forward_neuron(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: Optional[torch.Tensor] = None,
+        offsets: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
+
+        def _apply_rotary_emb_neuron(
+            x: torch.Tensor,
+            cos: torch.Tensor,
+            sin: torch.Tensor,
+            is_neox_style: bool,
+        ) -> torch.Tensor:
+            cos = cos.unsqueeze(-2).to(x.dtype)
+            sin = sin.unsqueeze(-2).to(x.dtype)
+            if is_neox_style:
+                x1, x2 = torch.chunk(x, 2, dim=-1)
+            else:
+                # x1 = x[..., ::2]
+
+                # x2 = x[..., 1::2]
+                d = x.shape[-1] // 2
+                x_reshaped = x.view(-1, x.shape[-1])
+                x1 = x_reshaped[:, ::2].view(*x.shape[:-1], d)
+                x2 = x_reshaped[:, 1::2].view(*x.shape[:-1], d)
+            o1 = x1 * cos - x2 * sin
+            o2 = x2 * cos + x1 * sin
+            if is_neox_style:
+                return torch.cat((o1, o2), dim=-1)
+            else:
+                return torch.stack((o1, o2), dim=-1).flatten(-2)
+
+        if offsets is not None:
+            positions = positions + offsets
+
+        self.cos_sin_cache = self.cos_sin_cache.to(query.device,
+                                                   dtype=query.dtype)
+
+        positions = positions.flatten()
+        num_tokens = positions.shape[0]
+        cos_sin = self.cos_sin_cache.index_select(0, positions)
+        cos, sin = cos_sin.chunk(2, dim=-1)
+
+        query_shape = query.shape
+        query = query.view(num_tokens, -1, self.head_size)
+        if key is not None:
+            key_shape = key.shape
+            key = key.view(num_tokens, -1, self.head_size)
+
+        if self.rotary_dim == self.head_size:
+            query = apply_rotary_emb_dispatch(query, cos, sin,
+                                              self.is_neox_style)
+            query = query.reshape(query_shape)
+            if key is not None:
+                key = apply_rotary_emb_dispatch(key, cos, sin,
+                                                self.is_neox_style)
+                key = key.reshape(key_shape)
+        else:
+            head_size = query.shape[-1]
+            query_reshaped = query.view(-1, head_size)
+            query_pass = query_reshaped[:, self.rotary_dim:].view(
+                *query.shape[:-1], head_size - self.rotary_dim)
+            query_rot = query_reshaped[:, :self.rotary_dim].view(
+                *query.shape[:-1], self.rotary_dim)
+            query_rot = _apply_rotary_emb_neuron(query_rot, cos, sin,
+                                                 self.is_neox_style)
+            query = torch.cat((query_rot, query_pass),
+                              dim=-1).reshape(query_shape)
+
+            if key is not None:
+                key_reshaped = key.view(-1, head_size)
+                key_pass = key_reshaped[:, self.rotary_dim:].view(
+                    *key.shape[:-1], head_size - self.rotary_dim)
+                key_rot = key_reshaped[:, :self.rotary_dim].view(
+                    *key.shape[:-1], self.rotary_dim)
+                key_rot = _apply_rotary_emb_neuron(key_rot, cos, sin,
+                                                   self.is_neox_style)
+                key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
+        return query, key
+
+    def extra_repr(self) -> str:
+        s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}"
+        s += f", max_position_embeddings={self.max_position_embeddings}"
+        s += f", base={self.base}, is_neox_style={self.is_neox_style}"
+        return s
diff --git a/vllm/model_executor/layers/rotary_embedding/common.py b/vllm/model_executor/layers/rotary_embedding/common.py
new file mode 100644
index 0000000000000..8d821bea19e3e
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/common.py
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import math
+
+import torch
+
+from vllm.platforms import current_platform
+
+if current_platform.is_cuda():
+    from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb
+
+
+# common functions
+def rotate_neox(x: torch.Tensor) -> torch.Tensor:
+    x1 = x[..., :x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2:]
+    return torch.cat((-x2, x1), dim=-1)
+
+
+def rotate_gptj(x: torch.Tensor) -> torch.Tensor:
+    x1 = x[..., ::2]
+    x2 = x[..., 1::2]
+    x = torch.stack((-x2, x1), dim=-1)
+    return x.flatten(-2)
+
+
+def apply_rotary_emb_torch(
+    x: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+    is_neox_style: bool,
+) -> torch.Tensor:
+    cos = cos.unsqueeze(-2).to(x.dtype)
+    sin = sin.unsqueeze(-2).to(x.dtype)
+    if is_neox_style:
+        x1, x2 = torch.chunk(x, 2, dim=-1)
+    else:
+        x1 = x[..., ::2]
+        x2 = x[..., 1::2]
+    o1 = x1 * cos - x2 * sin
+    o2 = x2 * cos + x1 * sin
+    if is_neox_style:
+        return torch.cat((o1, o2), dim=-1)
+    else:
+        return torch.stack((o1, o2), dim=-1).flatten(-2)
+
+
+def apply_rotary_emb_dispatch(x: torch.Tensor, cos: torch.Tensor,
+                              sin: torch.Tensor,
+                              is_neox_style: bool) -> torch.Tensor:
+    """
+    Args:
+        x: [num_tokens, num_heads, head_size]
+        cos: [num_tokens, head_size // 2]
+        sin: [num_tokens, head_size // 2]
+        is_neox_style: Whether to use the Neox-style or GPT-J-style rotary
+            positional embeddings.
+    """
+    if current_platform.is_cuda():
+        return apply_rotary_emb(x.unsqueeze(0), cos, sin,
+                                not is_neox_style).squeeze(0)
+    else:
+        return apply_rotary_emb_torch(x, cos, sin, is_neox_style)
+
+
+# yarn functions
+# Inverse dim formula to find dim based on number of rotations
+def yarn_find_correction_dim(num_rotations: int,
+                             dim: int,
+                             base: float = 10000,
+                             max_position_embeddings: int = 2048) -> float:
+    return (dim * math.log(max_position_embeddings /
+                           (num_rotations * 2 * math.pi))) / (2 *
+                                                              math.log(base))
+
+
+# Find dim range bounds based on rotations
+def yarn_find_correction_range(
+        low_rot: int,
+        high_rot: int,
+        dim: int,
+        base: float = 10000,
+        max_position_embeddings: int = 2048) -> tuple[int, int]:
+    low = math.floor(
+        yarn_find_correction_dim(low_rot, dim, base, max_position_embeddings))
+    high = math.ceil(
+        yarn_find_correction_dim(high_rot, dim, base, max_position_embeddings))
+    return max(low, 0), min(high, dim - 1)  # Clamp values just in case
+
+
+def yarn_linear_ramp_mask(low: float, high: float, dim: int,
+                          dtype: torch.dtype) -> torch.Tensor:
+    if low == high:
+        high += 0.001  # Prevent singularity
+
+    linear_func = (torch.arange(dim, dtype=dtype) - low) / (high - low)
+    ramp_func = torch.clamp(linear_func, 0, 1)
+    return ramp_func
+
+
+def yarn_get_mscale(scale: float = 1) -> float:
+    if scale <= 1:
+        return 1.0
+    return 0.1 * math.log(scale) + 1.0
diff --git a/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py
new file mode 100644
index 0000000000000..cd888b733426b
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import math
+from typing import Optional
+
+import torch
+
+from vllm.platforms import current_platform
+
+from .base import RotaryEmbedding
+from .common import (rotate_gptj, rotate_neox, yarn_find_correction_range,
+                     yarn_linear_ramp_mask)
+
+
+def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float:
+    if scale <= 1:
+        return 1.0
+    return 0.1 * mscale * math.log(scale) + 1.0
+
+
+class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
+    """RotaryEmbedding extended with YaRN method.
+
+    Credits to Peng et al. github.com/jquesnelle/yarn
+    """
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        scaling_factor: float,
+        dtype: torch.dtype,
+        *,
+        extrapolation_factor: float = 1,
+        attn_factor: float = 1,
+        beta_fast: int = 32,
+        beta_slow: int = 1,
+        mscale: float = 1,
+        mscale_all_dim: float = 0,
+    ) -> None:
+        self.scaling_factor = scaling_factor
+        self.extrapolation_factor = extrapolation_factor
+        self.attn_factor = attn_factor
+        self.beta_fast = beta_fast
+        self.beta_slow = beta_slow
+        # Get n-d magnitude scaling corrected for interpolation.
+        self.mscale = float(
+            yarn_get_mscale(self.scaling_factor, float(mscale)) /
+            yarn_get_mscale(self.scaling_factor, float(mscale_all_dim)) *
+            attn_factor)
+        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
+                         is_neox_style, dtype)
+
+    def _compute_inv_freq(self, scaling_factor: float) -> torch.Tensor:
+        pos_freqs = self.base**(
+            torch.arange(0,
+                         self.rotary_dim,
+                         2,
+                         dtype=torch.float,
+                         device=current_platform.device_type) /
+            self.rotary_dim)
+        inv_freq_extrapolation = 1.0 / pos_freqs
+        inv_freq_interpolation = 1.0 / (scaling_factor * pos_freqs)
+
+        low, high = yarn_find_correction_range(self.beta_fast, self.beta_slow,
+                                               self.rotary_dim, self.base,
+                                               self.max_position_embeddings)
+        # Get n-d rotational scaling corrected for extrapolation
+        inv_freq_mask = (1 - yarn_linear_ramp_mask(
+            low, high, self.rotary_dim // 2,
+            dtype=torch.float)) * self.extrapolation_factor
+        inv_freq = inv_freq_interpolation * (
+            1 - inv_freq_mask) + inv_freq_extrapolation * inv_freq_mask
+        return inv_freq
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        inv_freq = self._compute_inv_freq(self.scaling_factor)
+        t = torch.arange(self.max_position_embeddings * self.scaling_factor,
+                         device=current_platform.device_type,
+                         dtype=torch.float32)
+        freqs = torch.einsum("i,j -> ij", t, inv_freq)
+        cos = (freqs.cos() * self.mscale)
+        sin = (freqs.sin() * self.mscale)
+        cache = torch.cat((cos, sin), dim=-1)
+        return cache
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: Optional[torch.Tensor] = None,
+        offsets: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
+        """PyTorch-native implementation equivalent to forward()."""
+        assert key is not None
+        query_rot = query[..., :self.rotary_dim]
+        key_rot = key[..., :self.rotary_dim]
+        if self.rotary_dim < self.head_size:
+            query_pass = query[..., self.rotary_dim:]
+            key_pass = key[..., self.rotary_dim:]
+
+        if self.cos_sin_cache.device != positions.device:
+            self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(
+                positions.device)
+        cos_sin = self.cos_sin_cache[torch.add(positions, offsets)
+                                     if offsets is not None else positions]
+        cos, sin = cos_sin.chunk(2, dim=-1)
+        if self.is_neox_style:
+            # NOTE(woosuk): Here we assume that the positions tensor has the
+            # shape [batch_size, seq_len].
+            cos = cos.repeat(1, 1, 2).unsqueeze(-2)
+            sin = sin.repeat(1, 1, 2).unsqueeze(-2)
+        else:
+            cos = cos.repeat_interleave(2, dim=-1).unsqueeze(-2)
+            sin = sin.repeat_interleave(2, dim=-1).unsqueeze(-2)
+
+        rotate_fn = rotate_neox if self.is_neox_style else rotate_gptj
+        query_rot = query_rot * cos + rotate_fn(query_rot) * sin
+        key_rot = key_rot * cos + rotate_fn(key_rot) * sin
+
+        if self.rotary_dim < self.head_size:
+            query = torch.cat((query_rot, query_pass), dim=-1)
+            key = torch.cat((key_rot, key_pass), dim=-1)
+        else:
+            query = query_rot
+            key = key_rot
+        return query, key
diff --git a/vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py b/vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py
new file mode 100644
index 0000000000000..3d8da0fa9d8f5
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Optional
+
+import torch
+
+from vllm.model_executor.custom_op import CustomOp
+
+from .common import rotate_gptj, rotate_neox
+
+
+@CustomOp.register("dual_chunk_rotary_embedding")
+class DualChunkRotaryEmbedding(CustomOp):
+    """Rotary positional embedding for Dual Chunk Attention."""
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        dtype: torch.dtype,
+        chunk_size: int,
+        local_size: int,
+    ) -> None:
+        super().__init__()
+        self.head_size = head_size
+        self.rotary_dim = rotary_dim
+        self.max_position_embeddings = max_position_embeddings
+        self.base = base
+        self.is_neox_style = is_neox_style
+        self.chunk_size = chunk_size
+        self.local_size = local_size
+        self.dtype = dtype
+        self.device = torch.device(f"cuda:{torch.cuda.current_device()}")
+        (q_cache, qc_cache, k_cache, qc_no_clamp_cache,
+         q_inter_cache) = self._compute_cos_sin_cache()
+
+        self.register_buffer("cos_sin_q_cache", q_cache, persistent=False)
+        self.register_buffer("cos_sin_qc_cache", qc_cache, persistent=False)
+        self.register_buffer("cos_sin_k_cache", k_cache, persistent=False)
+        self.register_buffer("cos_sin_qc_no_clamp_cache",
+                             qc_no_clamp_cache,
+                             persistent=False)
+        self.register_buffer("cos_sin_q_inter_cache",
+                             q_inter_cache,
+                             persistent=False)
+
+    def _compute_inv_freq(self, base: float) -> torch.Tensor:
+        """Compute the inverse frequency."""
+        # NOTE(woosuk): The HF implementation uses `torch.arange(...).float()`.
+        # However, we use `torch.arange(..., dtype=torch.float)` instead to
+        # avoid numerical issues with large base values (e.g., 10000000).
+        # This may cause a slight numerical difference between the HF
+        # implementation and ours.
+        # NOTE(woosuk): To exactly match the HF implementation, we need to
+        # use CPU to compute the cache and then move it to GPU. However, we
+        # create the cache on GPU for faster initialization. This may cause
+        # a slight numerical difference between the HF implementation and ours.
+        inv_freq = 1.0 / (base**(torch.arange(
+            0, self.rotary_dim, 2, dtype=torch.float) / self.rotary_dim))
+        return inv_freq
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        """Compute the cos and sin cache."""
+        inv_freq = self._compute_inv_freq(self.base)
+        chunk_len = self.chunk_size - self.local_size
+        q_t = torch.arange(chunk_len, dtype=torch.float)
+        qc_t = (torch.arange(chunk_len, dtype=torch.float) +
+                chunk_len).clamp(max=self.chunk_size)
+        k_t = torch.arange(self.max_position_embeddings,
+                           dtype=torch.float) % chunk_len
+
+        # count from chunk_len, no clamp(self.chunk_size) restriction
+        qc_no_clamp_t = torch.arange(chunk_len, dtype=torch.float) + chunk_len
+        # count from self.chunk_size for q_inter's rope
+        q_inter_t = torch.arange(chunk_len,
+                                 dtype=torch.float) + self.chunk_size
+
+        q_freqs = torch.outer(q_t, inv_freq)
+        qc_freqs = torch.outer(qc_t, inv_freq)
+        k_freqs = torch.outer(k_t, inv_freq)
+        qc_no_clamp_freqs = torch.outer(qc_no_clamp_t, inv_freq)
+        q_inter_freqs = torch.outer(q_inter_t, inv_freq)
+
+        q_cos = q_freqs.cos()
+        q_sin = q_freqs.sin()
+        qc_cos = qc_freqs.cos()
+        qc_sin = qc_freqs.sin()
+        k_cos = k_freqs.cos()
+        k_sin = k_freqs.sin()
+
+        qc_no_clamp_cos = qc_no_clamp_freqs.cos()
+        qc_no_clamp_sin = qc_no_clamp_freqs.sin()
+        q_inter_cos = q_inter_freqs.cos()
+        q_inter_sin = q_inter_freqs.sin()
+
+        q_cache = torch.cat((q_cos, q_sin), dim=-1).to(dtype=self.dtype,
+                                                       device=self.device)
+        qc_cache = torch.cat((qc_cos, qc_sin), dim=-1).to(dtype=self.dtype,
+                                                          device=self.device)
+        k_cache = torch.cat((k_cos, k_sin), dim=-1).to(dtype=self.dtype,
+                                                       device=self.device)
+        qc_no_clamp_cache = torch.cat((qc_no_clamp_cos, qc_no_clamp_sin),
+                                      dim=-1).to(dtype=self.dtype,
+                                                 device=self.device)
+        q_inter_cache = torch.cat((q_inter_cos, q_inter_sin),
+                                  dim=-1).to(dtype=self.dtype,
+                                             device=self.device)
+        return q_cache, qc_cache, k_cache, qc_no_clamp_cache, q_inter_cache
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        offsets: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        query = query.view(*query.shape[:-1], -1, self.head_size)
+        key = key.view(*key.shape[:-1], -1, self.head_size)
+        query_rot = query[..., :self.rotary_dim]
+        key_rot = key[..., :self.rotary_dim]
+        if self.rotary_dim < self.head_size:
+            query_pass = query[..., self.rotary_dim:]
+            key_pass = key[..., self.rotary_dim:]
+        else:
+            query_pass = None
+            key_pass = None
+
+        positions_with_offsets = (torch.add(positions, offsets)
+                                  if offsets is not None else positions)
+        key = self._apply_rotary_embedding(
+            self.cos_sin_k_cache[positions_with_offsets], key_rot, key_pass)
+        chunk_len = self.chunk_size - self.local_size
+        query = self._apply_rotary_embedding(
+            self.cos_sin_q_cache[positions_with_offsets % chunk_len],
+            query_rot, query_pass)
+        query_succ = self._apply_rotary_embedding(
+            self.cos_sin_qc_cache[positions_with_offsets % chunk_len],
+            query_rot, query_pass)
+        query_inter = self._apply_rotary_embedding(
+            self.cos_sin_qc_cache[chunk_len - 1].repeat(positions.shape[0], 1),
+            query_rot, query_pass)
+        query_succ_critical = self._apply_rotary_embedding(
+            self.cos_sin_qc_no_clamp_cache[positions_with_offsets % chunk_len],
+            query_rot, query_pass)
+        query_inter_critical = self._apply_rotary_embedding(
+            self.cos_sin_q_inter_cache[positions_with_offsets % chunk_len],
+            query_rot, query_pass)
+
+        # merge query into one tensor to simplify the interfaces
+        query = torch.cat((
+            query,
+            query_succ,
+            query_inter,
+            query_succ_critical,
+            query_inter_critical,
+        ),
+                          dim=-1)
+        return query, key
+
+    def _apply_rotary_embedding(self, cos_sin, hidden_rot, hidden_pass):
+        cos, sin = cos_sin.chunk(2, dim=-1)
+        if self.is_neox_style:
+            # NOTE(woosuk): Here we assume that the positions tensor has the
+            # shape [batch_size, seq_len].
+            cos = cos.repeat(1, 1, 2).unsqueeze(-2)
+            sin = sin.repeat(1, 1, 2).unsqueeze(-2)
+        else:
+            cos = cos.repeat_interleave(2, dim=-1).unsqueeze(-2)
+            sin = sin.repeat_interleave(2, dim=-1).unsqueeze(-2)
+        rotate_fn = rotate_neox if self.is_neox_style else rotate_gptj
+        hidden_rot = hidden_rot * cos + rotate_fn(hidden_rot) * sin
+
+        if self.rotary_dim < self.head_size:
+            hidden = torch.cat((hidden_rot, hidden_pass), dim=-1)
+        else:
+            hidden = hidden_rot
+        return hidden.flatten(-2).squeeze(0)
+
+    def extra_repr(self) -> str:
+        s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}"
+        s += f", max_position_embeddings={self.max_position_embeddings}"
+        s += f", base={self.base}, is_neox_style={self.is_neox_style}"
+        s += f", chunk_size={self.chunk_size}, local_size={self.local_size}"
+        return s
diff --git a/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_alpha_rope.py b/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_alpha_rope.py
new file mode 100644
index 0000000000000..1da39bbd303bd
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_alpha_rope.py
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from .base import RotaryEmbedding
+
+
+class DynamicNTKAlphaRotaryEmbedding(RotaryEmbedding):
+    """RotaryEmbedding extended with Dynamic NTK alpha.
+
+    Based on the original RotaryEmbedding implementation.
+    """
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        scaling_alpha: float,
+        dtype: torch.dtype,
+    ) -> None:
+        self.scaling_alpha = scaling_alpha
+        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
+                         is_neox_style, dtype)
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        # For Hunyuan DynamicNTKAlphaRotaryEmbedding
+        max_len = self.max_position_embeddings
+        base = self.base * self.scaling_alpha**(self.rotary_dim /
+                                                (self.rotary_dim - 2))
+        inv_freq = self._compute_inv_freq(base)
+        t = torch.arange(max_len, dtype=torch.float)
+
+        freqs = torch.einsum("i,j -> ij", t, inv_freq)
+        cos = freqs.cos()
+        sin = freqs.sin()
+        cache = torch.cat((cos, sin), dim=-1)
+        return cache
diff --git a/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
new file mode 100644
index 0000000000000..ec2008b90cfb8
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# Adapted from
+# https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py
+# Copyright 2023 The vLLM team.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from .base import RotaryEmbedding
+
+
+class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding):
+    """RotaryEmbedding extended with Dynamic NTK scaling.
+
+    Credits to the Reddit users /u/bloc97 and /u/emozilla
+    """
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        scaling_factor: float,
+        dtype: torch.dtype,
+    ) -> None:
+        self.scaling_factor = scaling_factor
+        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
+                         is_neox_style, dtype)
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        # NOTE(woosuk): self.max_position_embeddings is the original
+        # maximum length before applying the rope scaling.
+        # Thus, the maximum length after applying the rope scaling is
+        # self.max_position_embeddings * self.scaling_factor.
+        max_len = self.max_position_embeddings * self.scaling_factor
+        base = self.base * (
+            (self.scaling_factor * max_len / self.max_position_embeddings) -
+            (self.scaling_factor - 1))**(self.rotary_dim /
+                                         (self.rotary_dim - 2))
+        inv_freq = self._compute_inv_freq(base)
+        t = torch.arange(max_len, dtype=torch.float)
+
+        freqs = torch.einsum("i,j -> ij", t, inv_freq)
+        cos = freqs.cos()
+        sin = freqs.sin()
+        cache = torch.cat((cos, sin), dim=-1)
+        return cache
diff --git a/vllm/model_executor/layers/rotary_embedding/linear_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/linear_scaling_rope.py
new file mode 100644
index 0000000000000..6e920991882d4
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/linear_scaling_rope.py
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Union
+
+# Adapted from
+# https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py
+# Copyright 2023 The vLLM team.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from .base import RotaryEmbedding
+
+
+class LinearScalingRotaryEmbedding(RotaryEmbedding):
+    """RotaryEmbedding extended with linear scaling.
+
+    It supports multiple scaling factors. Since multiple LoRA adapters may have
+    different scaling factors, we need multiple cos/sin caches. In this way,
+    instead of running rotary embedding kernel per lora, we can run multiple
+    lora in a batched way.
+
+    In addition to that, we also keep the cos/sin cache for the scaling factor
+    of 1 (default) at all times.
+
+    Exemplary for two scaling factors x=1, y and z with embeddings
+    [[x11, x12, ... x1m], ..., [xn1, xn2, ..., xnm]] and
+    [[y11, y12, ... y1o], ..., [yn1, yn2, ..., yno]], and
+    [[z11, z12, ... z1p], ..., [zn1, zn2, ..., znp]],
+
+    we construct the cos/sin cache as follows:
+    [[x11, x12, ... x1m, y11, y12, ... y1o, z11, z12, ... z1p],
+        ...
+     [xn1, xn2, ... xnm, yn1, yn2, ... yno, zn1, zn2, ... znp]]
+
+    We then use offsets to index into the cos/sin cache for
+    the respective scaling factors.
+
+    The offset to cache can be accessed via `scaling_factor_to_offset` API.
+
+    Credits to the Reddit user /u/kaiokendev
+    """
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        scaling_factors: Union[list[float], float],
+        dtype: torch.dtype,
+    ) -> None:
+        if isinstance(scaling_factors, float):
+            scaling_factors = [scaling_factors]
+        self.scaling_factors: list[float] = scaling_factors  # noqa
+        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
+                         is_neox_style, dtype)
+        # Lazy initialized.
+        self._scaling_factor_to_offset: dict[float, int]
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        inv_freq = self._compute_inv_freq(self.base)
+        cache_list: list[torch.Tensor] = []
+        # offsets to the next cache in a tensor.
+        # Each offset corresponds to the same index in scaling_factors.
+        offsets: list[int] = []
+        for scaling_factor in self.scaling_factors:
+            # NOTE(woosuk): self.max_position_embeddings is the original
+            # maximum length before applying the rope scaling.
+            # Thus, the maximum length after applying the rope scaling is
+            # self.max_position_embeddings * self.scaling_factor.
+            max_len = self.max_position_embeddings * scaling_factor
+            t = torch.arange(max_len, dtype=torch.float)
+            t = t / scaling_factor
+
+            freqs = torch.einsum("i,j -> ij", t, inv_freq)
+            cos = freqs.cos()
+            sin = freqs.sin()
+            cache = torch.cat((cos, sin), dim=-1)
+            if not cache_list:
+                offset = 0
+            else:
+                last_offset = offsets[-1]
+                next_max_len = cache_list[-1].shape[0]
+                offset = last_offset + next_max_len
+            offsets.append(offset)
+            cache_list.append(cache)
+        self._scaling_factor_to_offset = {
+            float(scaling_factor): offsets[i]
+            for i, scaling_factor in enumerate(self.scaling_factors)
+        }
+        assert len(self.scaling_factors) == len(offsets)
+        return torch.cat(cache_list, dim=0)
+
+    @property
+    def scaling_factor_to_offset(self) -> dict[float, int]:
+        return self._scaling_factor_to_offset
diff --git a/vllm/model_executor/layers/rotary_embedding/llama3_rope.py b/vllm/model_executor/layers/rotary_embedding/llama3_rope.py
new file mode 100644
index 0000000000000..adcef549bc4c2
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/llama3_rope.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import math
+
+import torch
+
+from .base import RotaryEmbedding
+
+
+class Llama3RotaryEmbedding(RotaryEmbedding):
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        dtype: torch.dtype,
+        scaling_factor: float,
+        low_freq_factor: float,
+        high_freq_factor: float,
+        orig_max_position: int,
+    ) -> None:
+        self.scaling_factor = scaling_factor
+        self.low_freq_factor = low_freq_factor
+        self.high_freq_factor = high_freq_factor
+        self.orig_max_position = orig_max_position
+        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
+                         is_neox_style, dtype)
+
+    def _compute_inv_freq(self, base: float) -> torch.Tensor:
+        inv_freqs = super()._compute_inv_freq(base)
+        low_freq_wavelen = self.orig_max_position / self.low_freq_factor
+        high_freq_wavelen = self.orig_max_position / self.high_freq_factor
+
+        wave_len = 2 * math.pi / inv_freqs
+        if self.low_freq_factor != self.high_freq_factor:
+            smooth = (self.orig_max_position / wave_len - self.low_freq_factor
+                      ) / (self.high_freq_factor - self.low_freq_factor)
+        else:
+            smooth = 0
+        new_freqs = torch.where(
+            wave_len < high_freq_wavelen,
+            inv_freqs,
+            torch.where(
+                wave_len > low_freq_wavelen,
+                inv_freqs / self.scaling_factor,
+                (1 - smooth) * inv_freqs / self.scaling_factor +
+                smooth * inv_freqs,
+            ),
+        )
+        return new_freqs
diff --git a/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py b/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py
new file mode 100644
index 0000000000000..415a85ab698bc
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import math
+from typing import Optional
+
+import torch
+
+from .base import RotaryEmbedding
+
+
+class Llama4VisionRotaryEmbedding(RotaryEmbedding):
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        dtype: torch.dtype,
+    ):
+        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
+                         is_neox_style, dtype)
+
+    def _compute_inv_freq(self, base: float) -> torch.Tensor:
+        inv_freqs = super()._compute_inv_freq(base)
+        inv_freqs = inv_freqs[:(self.rotary_dim // 2)]
+        return inv_freqs
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        inv_freq = self._compute_inv_freq(self.base)
+
+        # self.max_position_embeddings here is number of image patches
+        # i.e. (image_size // patch_size) ** 2
+        num_patches = self.max_position_embeddings
+        img_idx = torch.arange(num_patches,
+                    dtype=torch.int32) \
+                    .reshape(num_patches, 1)
+        img_idx = torch.cat([img_idx, img_idx[:1]], dim=0)
+        img_idx[-1, -1] = -2  # set to ID_CLS_TOKEN
+        num_patches_single_dim = int(math.sqrt(num_patches))
+        frequencies_x = img_idx % num_patches_single_dim
+        frequencies_y = img_idx // num_patches_single_dim
+        freqs_x = ((frequencies_x + 1)[..., None] *
+                   inv_freq[None, None, :]).repeat_interleave(2, dim=-1)
+        freqs_y = ((frequencies_y + 1)[..., None] *
+                   inv_freq[None, None, :]).repeat_interleave(2, dim=-1)
+        freqs = torch.cat([freqs_x, freqs_y],
+                          dim=-1).float().contiguous()[..., ::2]
+        freqs = freqs.masked_fill(img_idx.reshape(-1, 1, 1) < 0, 0)
+        cache = torch.view_as_complex(
+            torch.stack([torch.cos(freqs), torch.sin(freqs)], dim=-1))
+        return cache
+
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
+        assert key is not None
+        self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(query.device)
+        query_ = torch.view_as_complex(query.float().reshape(
+            *query.shape[:-1], -1, 2))
+        key_ = torch.view_as_complex(key.float().reshape(
+            *key.shape[:-1], -1, 2))
+        broadcast_shape = [
+            d if i == 1 or i == (query_.ndim - 1) else 1
+            for i, d in enumerate(query_.shape)
+        ]
+        freqs_ci = self.cos_sin_cache.view(*broadcast_shape)
+        query_out = torch.view_as_real(query_ * freqs_ci).flatten(3)
+        key_out = torch.view_as_real(key_ * freqs_ci).flatten(3)
+        return query_out.type_as(query), key_out.type_as(key)
diff --git a/vllm/model_executor/layers/rotary_embedding/mrope.py b/vllm/model_executor/layers/rotary_embedding/mrope.py
new file mode 100644
index 0000000000000..a75b9e5eb435c
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/mrope.py
@@ -0,0 +1,670 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import itertools
+from typing import Optional, Union
+
+import numpy as np
+import torch
+from transformers import PretrainedConfig
+
+from .base import RotaryEmbedding
+from .common import apply_rotary_emb_dispatch
+
+
+class MRotaryEmbedding(RotaryEmbedding):
+    """Rotary Embedding with Multimodal Sections."""
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        dtype: torch.dtype,
+        mrope_section: Optional[list[int]] = None,
+    ) -> None:
+        # In Qwen2.5-VL, the maximum index value is related to the duration of
+        # the input video. We enlarge max_position_embeddings to 4 times to get
+        # a larger the cos and sin cache.
+        self.cache_max_position_num = max_position_embeddings * 4
+        super().__init__(head_size, rotary_dim, self.cache_max_position_num,
+                         base, is_neox_style, dtype)
+
+        self.mrope_section = mrope_section
+        if self.mrope_section:
+            assert sum(self.mrope_section) == rotary_dim // 2
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
+        """PyTorch-native implementation equivalent to forward().
+
+        Args:
+            positions:
+                [num_tokens,] (text only) or
+                [3, num_tokens] (T/H/W positions with multimodal inputs)
+            query: [num_tokens, num_heads * head_size]
+            key: [num_tokens, num_kv_heads * head_size]
+        """
+        assert positions.ndim == 1 or positions.ndim == 2
+        assert key is not None
+
+        num_tokens = positions.shape[-1]
+        cos_sin = self.cos_sin_cache[positions]
+        cos, sin = cos_sin.chunk(2, dim=-1)
+        if positions.ndim == 2:
+            assert self.mrope_section
+
+            cos = torch.cat([
+                m[i]
+                for i, m in enumerate(cos.split(self.mrope_section, dim=-1))
+            ],
+                            dim=-1)
+            sin = torch.cat([
+                m[i]
+                for i, m in enumerate(sin.split(self.mrope_section, dim=-1))
+            ],
+                            dim=-1)
+
+        query_shape = query.shape
+        query = query.view(num_tokens, -1, self.head_size)
+        query_rot = query[..., :self.rotary_dim]
+        query_pass = query[..., self.rotary_dim:]
+        query_rot = apply_rotary_emb_dispatch(query_rot, cos, sin,
+                                              self.is_neox_style)
+        query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
+
+        key_shape = key.shape
+        key = key.view(num_tokens, -1, self.head_size)
+        key_rot = key[..., :self.rotary_dim]
+        key_pass = key[..., self.rotary_dim:]
+        key_rot = apply_rotary_emb_dispatch(key_rot, cos, sin,
+                                            self.is_neox_style)
+        key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
+        return query, key
+
+    @classmethod
+    def get_input_positions(
+        cls,
+        input_tokens: list[int],
+        hf_config: PretrainedConfig,
+        image_grid_thw: Optional[Union[list[list[int]], torch.Tensor]],
+        video_grid_thw: Optional[Union[list[list[int]], torch.Tensor]],
+        second_per_grid_ts: Optional[list[float]],
+        context_len: int = 0,
+        seq_len: Optional[int] = None,
+        audio_feature_lengths: Optional[torch.Tensor] = None,
+        use_audio_in_video: bool = False,
+    ) -> tuple[list[list[int]], int]:
+        """Get mrope input positions and delta value."""
+
+        image_grid_thw = [] if image_grid_thw is None else image_grid_thw
+        video_grid_thw = [] if video_grid_thw is None else video_grid_thw
+        second_per_grid_ts = [] if second_per_grid_ts is None else \
+            second_per_grid_ts
+
+        llm_positions, mrope_position_delta = \
+            cls.get_input_positions_tensor(
+                input_tokens=input_tokens,
+                hf_config=hf_config,
+                image_grid_thw=image_grid_thw,
+                video_grid_thw=video_grid_thw,
+                second_per_grid_ts=second_per_grid_ts,
+                context_len=context_len,
+                seq_len=seq_len,
+                audio_feature_lengths=audio_feature_lengths,
+                use_audio_in_video=use_audio_in_video,
+            )
+
+        return llm_positions.tolist(), mrope_position_delta
+
+    @classmethod
+    def get_input_positions_tensor(
+        cls,
+        input_tokens: list[int],
+        hf_config: PretrainedConfig,
+        image_grid_thw: Union[list[list[int]], torch.Tensor],
+        video_grid_thw: Union[list[list[int]], torch.Tensor],
+        second_per_grid_ts: list[float],
+        context_len: int = 0,
+        seq_len: Optional[int] = None,
+        audio_feature_lengths: Optional[torch.Tensor] = None,
+        use_audio_in_video: bool = False,
+    ) -> tuple[torch.Tensor, int]:
+        from vllm.transformers_utils.config import thinker_uses_mrope
+        if thinker_uses_mrope(hf_config):
+            return cls._omni_get_input_positions_tensor(
+                input_tokens=input_tokens,
+                hf_config=hf_config,
+                image_grid_thw=image_grid_thw,
+                video_grid_thw=video_grid_thw,
+                second_per_grid_ts=second_per_grid_ts,
+                context_len=context_len,
+                seq_len=seq_len,
+                audio_feature_lengths=audio_feature_lengths,
+                use_audio_in_video=use_audio_in_video,
+            )
+        elif hf_config.model_type in ["glm4v", "glm4v_moe"]:
+            return cls._glm4v_get_input_positions_tensor(
+                input_tokens=input_tokens,
+                hf_config=hf_config,
+                image_grid_thw=image_grid_thw,
+                video_grid_thw=video_grid_thw,
+                context_len=context_len,
+                seq_len=seq_len,
+            )
+        else:
+            return cls._vl_get_input_positions_tensor(
+                input_tokens=input_tokens,
+                hf_config=hf_config,
+                image_grid_thw=image_grid_thw,
+                video_grid_thw=video_grid_thw,
+                second_per_grid_ts=second_per_grid_ts,
+                context_len=context_len,
+                seq_len=seq_len,
+            )
+
+    @classmethod
+    def _glm4v_get_input_positions_tensor(
+        cls,
+        input_tokens: list[int],
+        hf_config: PretrainedConfig,
+        image_grid_thw: Union[list[list[int]], torch.Tensor],
+        video_grid_thw: Union[list[list[int]], torch.Tensor],
+        context_len: int = 0,
+        seq_len: Optional[int] = None,
+    ) -> tuple[torch.Tensor, int]:
+        """Get mrope input positions and delta value for GLM4V."""
+
+        image_token_id = hf_config.image_token_id
+        video_start_token_id = hf_config.video_start_token_id
+        video_end_token_id = hf_config.video_end_token_id
+        spatial_merge_size = hf_config.vision_config.spatial_merge_size
+        llm_pos_ids_list: list = []
+
+        if not (image_grid_thw is None and video_grid_thw is None):
+            if isinstance(image_grid_thw, torch.Tensor):
+                image_grid_thw = image_grid_thw.tolist()
+
+            input_token_type: list[str] = []
+            video_check_flg = False
+            for token in input_tokens:
+                if token == video_start_token_id:
+                    video_check_flg = True
+                elif token == video_end_token_id:
+                    video_check_flg = False
+
+                if (token == image_token_id) and (video_check_flg is False):
+                    input_token_type.append("image")
+                elif (token == image_token_id) and (video_check_flg is True):
+                    input_token_type.append("video")
+                else:
+                    input_token_type.append("text")
+
+            input_type_group: list[tuple[str, int, int]] = []
+            for key, group_iter in itertools.groupby(
+                    enumerate(input_token_type), lambda x: x[1]):
+                group_list = list(group_iter)
+                start_index = group_list[0][0]
+                end_index = group_list[-1][0] + 1
+                input_type_group.append((key, start_index, end_index))
+
+            video_frame_num = 1
+            mm_data_idx = 0
+            for modality_type, start_idx, end_idx in input_type_group:
+                st_idx = llm_pos_ids_list[-1].max() + 1 if len(
+                    llm_pos_ids_list) > 0 else 0
+                if modality_type == "image":
+                    t, h, w = (
+                        image_grid_thw[mm_data_idx][0],
+                        image_grid_thw[mm_data_idx][1],
+                        image_grid_thw[mm_data_idx][2],
+                    )
+                    llm_grid_t, llm_grid_h, llm_grid_w = \
+                        t, h // spatial_merge_size, w // spatial_merge_size
+
+                    t_index = torch.arange(llm_grid_t).view(-1, 1).expand(
+                        -1, llm_grid_h * llm_grid_w).flatten()
+                    h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand(
+                        llm_grid_t, -1, llm_grid_w).flatten()
+                    w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand(
+                        llm_grid_t, llm_grid_h, -1).flatten()
+                    llm_pos_ids_list.append(
+                        torch.stack([t_index, h_index, w_index]) + st_idx)
+                    mm_data_idx += 1
+
+                elif modality_type == "video":
+                    t, h, w = (
+                        video_frame_num,
+                        image_grid_thw[mm_data_idx][1],
+                        image_grid_thw[mm_data_idx][2],
+                    )
+                    llm_grid_t, llm_grid_h, llm_grid_w = \
+                        t, h // spatial_merge_size, w // spatial_merge_size
+
+                    for t_idx in range(llm_grid_t):
+                        t_index = torch.tensor(t_idx).view(-1, 1).expand(
+                            -1, llm_grid_h * llm_grid_w).flatten()
+                        h_index = torch.arange(llm_grid_h).view(
+                            1, -1, 1).expand(1, -1, llm_grid_w).flatten()
+                        w_index = torch.arange(llm_grid_w).view(
+                            1, 1, -1).expand(1, llm_grid_h, -1).flatten()
+                        llm_pos_ids_list.append(
+                            torch.stack([t_index, h_index, w_index]) + st_idx)
+
+                    mm_data_idx += 1
+                    video_frame_num += 1
+
+                else:
+                    text_len = end_idx - start_idx
+                    llm_pos_ids_list.append(
+                        torch.arange(text_len).view(1, -1).expand(3, -1) +
+                        st_idx)
+                    video_frame_num = 1
+
+        else:
+            text_len = len(input_tokens)
+            llm_pos_ids_list.append(
+                torch.arange(text_len).view(1, -1).expand(3, -1))
+
+        llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
+        llm_positions = llm_positions[:, context_len:seq_len]
+        mrope_position_delta = (llm_positions.max() + 1 -
+                                len(input_tokens)).item()
+        return llm_positions, mrope_position_delta
+
+    @classmethod
+    def _vl_get_input_positions_tensor(
+        cls,
+        input_tokens: list[int],
+        hf_config: PretrainedConfig,
+        image_grid_thw: Union[list[list[int]], torch.Tensor],
+        video_grid_thw: Union[list[list[int]], torch.Tensor],
+        second_per_grid_ts: list[float],
+        context_len: int = 0,
+        seq_len: Optional[int] = None,
+    ) -> tuple[torch.Tensor, int]:
+        """Get mrope input positions and delta value."""
+
+        image_token_id = hf_config.image_token_id
+        video_token_id = hf_config.video_token_id
+        vision_start_token_id = hf_config.vision_start_token_id
+        spatial_merge_size = hf_config.vision_config.spatial_merge_size
+        tokens_per_second = getattr(hf_config.vision_config,
+                                    "tokens_per_second", 1.0)
+
+        input_tokens_tensor = torch.tensor(input_tokens)
+        vision_start_indices = torch.argwhere(
+            input_tokens_tensor == vision_start_token_id).squeeze(1)
+        vision_tokens = input_tokens_tensor[vision_start_indices + 1]
+        image_nums = (vision_tokens == image_token_id).sum()
+        video_nums = (vision_tokens == video_token_id).sum()
+        llm_pos_ids_list: list = []
+
+        st = 0
+        remain_images, remain_videos = image_nums, video_nums
+
+        image_index, video_index = 0, 0
+        for _ in range(image_nums + video_nums):
+            video_second_per_grid_t = 0.0
+            if image_token_id in input_tokens and remain_images > 0:
+                ed_image = input_tokens.index(image_token_id, st)
+            else:
+                ed_image = len(input_tokens) + 1
+            if video_token_id in input_tokens and remain_videos > 0:
+                ed_video = input_tokens.index(video_token_id, st)
+            else:
+                ed_video = len(input_tokens) + 1
+            if ed_image < ed_video:
+                t, h, w = (
+                    image_grid_thw[image_index][0],
+                    image_grid_thw[image_index][1],
+                    image_grid_thw[image_index][2],
+                )
+                image_index += 1
+                remain_images -= 1
+                ed = ed_image
+            else:
+                t, h, w = (
+                    video_grid_thw[video_index][0],
+                    video_grid_thw[video_index][1],
+                    video_grid_thw[video_index][2],
+                )
+                video_second_per_grid_t = 1.0
+                if second_per_grid_ts:
+                    video_second_per_grid_t = second_per_grid_ts[video_index]
+                video_index += 1
+                remain_videos -= 1
+                ed = ed_video
+
+            llm_grid_t, llm_grid_h, llm_grid_w = \
+                t, h // spatial_merge_size, w // spatial_merge_size
+            text_len = ed - st
+
+            st_idx = llm_pos_ids_list[-1].max() + 1 if len(
+                llm_pos_ids_list) > 0 else 0
+            llm_pos_ids_list.append(
+                torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx)
+
+            t_index = (torch.arange(llm_grid_t).view(-1, 1).expand(
+                -1, llm_grid_h * llm_grid_w) * video_second_per_grid_t *
+                       tokens_per_second).long().flatten()
+
+            h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand(
+                llm_grid_t, -1, llm_grid_w).flatten()
+            w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand(
+                llm_grid_t, llm_grid_h, -1).flatten()
+            llm_pos_ids_list.append(
+                torch.stack([t_index, h_index, w_index]) + text_len + st_idx)
+            st = ed + llm_grid_t * llm_grid_h * llm_grid_w
+
+        if st < len(input_tokens):
+            st_idx = llm_pos_ids_list[-1].max() + 1 if len(
+                llm_pos_ids_list) > 0 else 0
+            text_len = len(input_tokens) - st
+            llm_pos_ids_list.append(
+                torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx)
+
+        llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
+        mrope_position_delta = (llm_positions.max() + 1 -
+                                len(input_tokens)).item()
+        llm_positions = llm_positions[:, context_len:seq_len]
+
+        return llm_positions, mrope_position_delta
+
+    @classmethod
+    def _omni_get_input_positions_tensor(
+        cls,
+        input_tokens: list[int],
+        hf_config: PretrainedConfig,
+        image_grid_thw: Union[list[list[int]], torch.Tensor],
+        video_grid_thw: Union[list[list[int]], torch.Tensor],
+        second_per_grid_ts: Optional[list[float]] = None,
+        context_len: int = 0,
+        seq_len: Optional[int] = None,
+        audio_feature_lengths: Optional[torch.Tensor] = None,
+        use_audio_in_video: bool = False,
+    ) -> tuple[torch.Tensor, int]:
+        """Get mrope input positions and delta value (Qwen2.5-Omni version).
+
+        Differences from MRotaryEmbedding:
+            1. Add audio support (and related `audio_feature_lengths`).
+            2. Add `use_audio_in_video` option to read audio from video inputs.
+                In this case, audio and vision position ids will be split into
+                chunks and interleaved.
+
+        Example:
+
+            (V_i are vision position ids, A_i are audio position ids)
+
+            |V_1 ...    V_n|A_1 ...   A_n|V_n+1 ... V_2n|A_n+1 ... A_2n|...
+            |vision chunk 1|audio chunk 1|vision chunk 2|audio chunk 2 |...
+        """
+
+        # TODO(fyabc): refactor and share more code with
+        #  _vl_get_input_positions_tensor.
+
+        thinker_config = hf_config.thinker_config
+        audio_token_id = thinker_config.audio_token_index
+        image_token_id = thinker_config.image_token_index
+        video_token_id = thinker_config.video_token_index
+        audio_start_token_id = thinker_config.audio_start_token_id
+        audio_end_token_id = thinker_config.audio_end_token_id
+        vision_start_token_id = thinker_config.vision_start_token_id
+        vision_end_token_id = thinker_config.vision_end_token_id
+        seconds_per_chunk = thinker_config.seconds_per_chunk
+        spatial_merge_size = thinker_config.vision_config.spatial_merge_size
+        tokens_per_second = getattr(thinker_config.vision_config,
+                                    "tokens_per_second", 25)
+
+        if isinstance(image_grid_thw, list):
+            image_grid_thw = torch.tensor(image_grid_thw)
+        if isinstance(video_grid_thw, list):
+            video_grid_thw = torch.tensor(video_grid_thw)
+
+        src_item = input_tokens
+        audio_seqlens = audio_feature_lengths
+        if not second_per_grid_ts:
+            second_per_grid_ts = [1] * video_grid_thw.shape[0]
+        audio_idx = 0
+        video_idx = 0
+        image_idx = 0
+        new_src_item: list[int] = []
+        llm_pos_ids_list: list[torch.Tensor] = []
+
+        idx = 0
+        while idx < len(src_item):
+            new_src_item_len = len(new_src_item)
+            start_idx = llm_pos_ids_list[-1].max() + 1 if len(
+                llm_pos_ids_list) > 0 else 0
+            if src_item[idx] not in [
+                    audio_token_id, video_token_id, image_token_id
+            ]:
+                if use_audio_in_video and idx > 0:
+                    if src_item[idx] == vision_end_token_id and \
+                        src_item[idx - 1] == audio_end_token_id:
+                        # processing the <|audio_eos|> before <|vision_eos|>
+                        start_idx -= 1
+                    elif src_item[idx] == audio_start_token_id and \
+                        src_item[idx - 1] == vision_start_token_id:
+                        # processing the <|audio_bos|> after <|vision_eos|>
+                        start_idx -= 1
+                new_src_item.append(src_item[idx])
+                llm_pos_ids = torch.tensor([start_idx],
+                                           dtype=torch.long).expand(3, -1)
+                llm_pos_ids_list.append(llm_pos_ids)
+            elif src_item[idx] == audio_token_id:
+                assert audio_seqlens is not None
+                audio_seqlen = audio_seqlens[audio_idx]
+                place_num = (((audio_seqlen - 1) // 2 + 1 - 2) // 2 + 1)
+                new_src_item.extend([audio_token_id] * place_num)
+                llm_pos_ids = torch.arange(place_num).expand(3, -1) + start_idx
+                llm_pos_ids_list.append(llm_pos_ids)
+                audio_idx += 1
+            elif src_item[idx] == image_token_id:
+                grid_t = image_grid_thw[image_idx][0]
+                grid_hs = image_grid_thw[:, 1]
+                grid_ws = image_grid_thw[:, 2]
+                t_index = (torch.arange(grid_t) * 1 * tokens_per_second).long()
+                llm_pos_ids = cls._get_llm_pos_ids_for_vision(
+                    start_idx, image_idx, spatial_merge_size, t_index, grid_hs,
+                    grid_ws)
+                llm_pos_ids_list.append(llm_pos_ids)
+                vision_seqlen = image_grid_thw[image_idx].prod() // (
+                    spatial_merge_size**2)
+                new_src_item.extend([image_token_id] * vision_seqlen)
+                image_idx += 1
+            elif src_item[idx] == video_token_id and not use_audio_in_video:
+                grid_t = video_grid_thw[video_idx][0]
+                grid_hs = video_grid_thw[:, 1]
+                grid_ws = video_grid_thw[:, 2]
+                t_index = (torch.arange(grid_t) *
+                           second_per_grid_ts[video_idx] *
+                           tokens_per_second).long()
+                llm_pos_ids = cls._get_llm_pos_ids_for_vision(
+                    start_idx, video_idx, spatial_merge_size, t_index, grid_hs,
+                    grid_ws)
+                llm_pos_ids_list.append(llm_pos_ids)
+                vision_seqlen = video_grid_thw[video_idx].prod() // (
+                    spatial_merge_size**2)
+                new_src_item.extend([video_token_id] * vision_seqlen)
+                video_idx += 1
+            else:
+                # read audio from video
+                assert audio_seqlens is not None
+                audio_seqlen = audio_seqlens[audio_idx]
+                vision_seqlen = video_grid_thw[video_idx].prod() // (
+                    spatial_merge_size**2)
+                grid_t = video_grid_thw[video_idx][0]
+                grid_h = video_grid_thw[video_idx][1]
+                grid_w = video_grid_thw[video_idx][2]
+                grid_hs = video_grid_thw[:, 1]
+                grid_ws = video_grid_thw[:, 2]
+                t_ntoken_per_chunk = int(tokens_per_second * seconds_per_chunk)
+                t_index = (torch.arange(grid_t) *
+                           second_per_grid_ts[video_idx] *
+                           tokens_per_second).long()
+                t_index_split_chunk = cls._split_list_into_ranges(
+                    t_index, t_ntoken_per_chunk)
+                place_num = (((audio_seqlen - 1) // 2 + 1 - 2) // 2 + 1) + 2
+                pure_audio_len = place_num - 2
+                added_audio_len = 0
+                audio_llm_pos_ids_list: list[torch.Tensor] = []
+                for t_chunk in t_index_split_chunk:
+                    vision_ntoken_per_chunk = len(
+                        t_chunk) * grid_h * grid_w // (spatial_merge_size**2)
+                    new_src_item.extend([video_token_id] *
+                                        vision_ntoken_per_chunk)
+                    vision_llm_pos_ids_list = cls._get_llm_pos_ids_for_vision(
+                        start_idx, video_idx, spatial_merge_size, t_chunk,
+                        grid_hs, grid_ws).split(1, dim=1)
+                    llm_pos_ids_list.extend(vision_llm_pos_ids_list)
+                    new_src_item.extend(
+                        min(t_ntoken_per_chunk, pure_audio_len -
+                            added_audio_len) * [audio_token_id])
+                    audio_start_idx = start_idx if len(
+                        audio_llm_pos_ids_list
+                    ) == 0 else audio_llm_pos_ids_list[-1][0].item() + 1
+                    if min(t_ntoken_per_chunk,
+                           pure_audio_len - added_audio_len) > 0:
+                        audio_llm_pos_ids_list = (torch.arange(
+                            min(t_ntoken_per_chunk, pure_audio_len -
+                                added_audio_len)).expand(3, -1) +
+                                                  audio_start_idx).split(1,
+                                                                         dim=1)
+                    else:
+                        audio_llm_pos_ids_list = []
+                    added_audio_len += min(t_ntoken_per_chunk,
+                                           pure_audio_len - added_audio_len)
+                    llm_pos_ids_list.extend(audio_llm_pos_ids_list)
+                if added_audio_len < pure_audio_len:
+                    new_src_item.extend(
+                        (pure_audio_len - added_audio_len) * [audio_token_id])
+                    audio_llm_pos_ids_list = (
+                        torch.arange(pure_audio_len - added_audio_len).expand(
+                            3, -1) + llm_pos_ids_list[-1].max() + 1).split(
+                                1, dim=1)
+                    llm_pos_ids_list.extend(audio_llm_pos_ids_list)
+                audio_idx += 1
+                video_idx += 1
+            # move to the next token
+            idx += len(new_src_item) - new_src_item_len
+
+        llm_positions = torch.cat(llm_pos_ids_list, dim=1)
+        mrope_position_delta = torch.cat(llm_pos_ids_list,
+                                         dim=1).max() + 1 - len(src_item)
+        llm_positions = llm_positions[:, context_len:seq_len]
+
+        return llm_positions, mrope_position_delta
+
+    @staticmethod
+    def _get_llm_pos_ids_for_vision(
+        start_idx: int,
+        vision_idx: int,
+        spatial_merge_size: int,
+        t_index: list[int],
+        grid_hs: torch.Tensor,
+        grid_ws: torch.Tensor,
+    ) -> torch.Tensor:
+        llm_pos_ids_list = []
+        llm_grid_h = grid_hs[vision_idx] // spatial_merge_size
+        llm_grid_w = grid_ws[vision_idx] // spatial_merge_size
+        h_index = (torch.arange(llm_grid_h).view(1, -1, 1).expand(
+            len(t_index), -1, llm_grid_w).flatten())
+        w_index = (torch.arange(llm_grid_w).view(1, 1, -1).expand(
+            len(t_index), llm_grid_h, -1).flatten())
+        t_index_tensor = torch.Tensor(t_index).to(llm_grid_h.device).view(
+            -1, 1).expand(-1, llm_grid_h * llm_grid_w).long().flatten()
+        _llm_pos_ids = torch.stack([t_index_tensor, h_index, w_index])
+        llm_pos_ids_list.append(_llm_pos_ids + start_idx)
+        llm_pos_ids = torch.cat(llm_pos_ids_list, dim=1)
+        return llm_pos_ids
+
+    @staticmethod
+    def _split_list_into_ranges(lst: torch.Tensor,
+                                interval: int) -> list[list[int]]:
+        ranges: list[list[int]] = [[]
+                                   for _ in range((max(lst) // interval) + 1)]
+        for num in lst:
+            index = num // interval
+            ranges[index].append(num)
+        return ranges
+
+    @staticmethod
+    def get_next_input_positions(
+        mrope_position_delta: int,
+        context_len: int,
+        seq_len: int,
+    ) -> list[list[int]]:
+        return [
+            list(
+                range(context_len + mrope_position_delta,
+                      seq_len + mrope_position_delta)) for _ in range(3)
+        ]
+
+    @staticmethod
+    def get_next_input_positions_tensor(out: np.ndarray, out_offset: int,
+                                        mrope_position_delta: int,
+                                        context_len: int, num_new_tokens: int):
+
+        values = np.arange(mrope_position_delta + context_len,
+                           mrope_position_delta + context_len + num_new_tokens,
+                           dtype=out.dtype)
+        out[:, out_offset:out_offset + num_new_tokens] = values
+
+    @classmethod
+    def omni_get_updates_use_audio_in_video(
+        cls,
+        thinker_config: PretrainedConfig,
+        audio_len: int,
+        video_grid_thw: Union[list[int], torch.Tensor],
+        video_second_per_grid_t: float,
+    ) -> list[int]:
+        """Get video prompt updates when `use_audio_in_video` is True.
+
+        In this case, audio and vision update ids will be split into
+        chunks and interleaved (details in `_omni_get_input_positions_tensor`).
+
+        <|video_bos|><|VIDEO|><|video_eos|> =>
+        <|video_bos|><|audio_bos|>(... chunks ...)<|audio_eos|><|video_eos|>
+        """
+
+        audio_token_id = thinker_config.audio_token_index
+        video_token_id = thinker_config.video_token_index
+        audio_start_token_id = thinker_config.audio_start_token_id
+        audio_end_token_id = thinker_config.audio_end_token_id
+        seconds_per_chunk = thinker_config.seconds_per_chunk
+        spatial_merge_size = thinker_config.vision_config.spatial_merge_size
+        tokens_per_second = getattr(thinker_config.vision_config,
+                                    "tokens_per_second", 25)
+
+        grid_t = video_grid_thw[0]
+        grid_h = video_grid_thw[1]
+        grid_w = video_grid_thw[2]
+        t_ntoken_per_chunk = int(tokens_per_second * seconds_per_chunk)
+        t_index = (torch.arange(grid_t) * video_second_per_grid_t *
+                   tokens_per_second).long()
+        t_index_split_chunk = cls._split_list_into_ranges(
+            t_index, t_ntoken_per_chunk)
+
+        updates = [audio_start_token_id]
+        added_audio_len = 0
+        for t_chunk in t_index_split_chunk:
+            vision_ntoken_per_chunk = len(t_chunk) * grid_h * grid_w // (
+                spatial_merge_size**2)
+            updates.extend([video_token_id] * vision_ntoken_per_chunk)
+
+            audio_chunk_size = min(t_ntoken_per_chunk,
+                                   audio_len - added_audio_len)
+            updates.extend(audio_chunk_size * [audio_token_id])
+            added_audio_len += audio_chunk_size
+        if added_audio_len < audio_len:
+            updates.extend((audio_len - added_audio_len) * [audio_token_id])
+        updates.extend([audio_end_token_id])
+
+        return updates
diff --git a/vllm/model_executor/layers/rotary_embedding/ntk_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/ntk_scaling_rope.py
new file mode 100644
index 0000000000000..42926bad22ef6
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/ntk_scaling_rope.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Optional
+
+import torch
+
+from .base import RotaryEmbedding
+
+
+class NTKScalingRotaryEmbedding(RotaryEmbedding):
+    """RotaryEmbedding extended with fixed and mixed NTK scaling.
+    https://kexue.fm/archives/9706 """
+
+    def __init__(self,
+                 head_size: int,
+                 rotary_dim: int,
+                 max_position_embeddings: int,
+                 base: float,
+                 is_neox_style: bool,
+                 scaling_factor: float,
+                 dtype: torch.dtype,
+                 mixed_b: Optional[float] = None) -> None:
+        self.scaling_factor = scaling_factor
+        self.mixed_b = mixed_b
+        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
+                         is_neox_style, dtype)
+
+    def _compute_inv_freq(self, base: float) -> torch.Tensor:
+        base = self.base * (self.scaling_factor if self.mixed_b is None else 1)
+        inv_freq = super()._compute_inv_freq(base)
+
+        if self.mixed_b is None:
+            inv_freq = inv_freq / self.scaling_factor**(2 / self.rotary_dim)
+        else:
+            a = torch.tensor(self.scaling_factor).log() / (self.rotary_dim /
+                                                           2)**self.mixed_b
+            lambda_1_m = (a * torch.arange(
+                1, self.rotary_dim // 2 + 1).float()**self.mixed_b).exp()
+            inv_freq = inv_freq / lambda_1_m
+
+        return inv_freq
diff --git a/vllm/model_executor/layers/rotary_embedding/phi3_long_rope_scaled_rope.py b/vllm/model_executor/layers/rotary_embedding/phi3_long_rope_scaled_rope.py
new file mode 100644
index 0000000000000..9c36d633e2a9f
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/phi3_long_rope_scaled_rope.py
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import math
+from typing import Optional
+
+import torch
+import torch.nn as nn
+
+from .common import rotate_neox
+
+
+class Phi3LongRoPEScaledRotaryEmbedding(nn.Module):
+    """Phi3 family of models scaled rotary embedding.
+
+    Based on the original RotaryEmbedding implementation.
+    """
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        original_max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        dtype: torch.dtype,
+        short_factor: list[float],
+        long_factor: list[float],
+        short_mscale: Optional[float] = None,
+        long_mscale: Optional[float] = None,
+    ):
+        super().__init__()
+
+        if is_neox_style is False:
+            raise ValueError(
+                "`Phi3LongRoPEScaledRotaryEmbedding` only supports neox_style."
+            )
+
+        self.rotary_dim = rotary_dim
+        self.head_size = head_size
+        self.max_position_embeddings = max_position_embeddings
+        self.original_max_position_embeddings = original_max_position_embeddings
+        self.base = base
+        self.short_factor = short_factor
+        self.long_factor = long_factor
+
+        scale = self.max_position_embeddings / \
+                self.original_max_position_embeddings
+        if scale <= 1.0:
+            scaling_factor = 1.0
+        else:
+            scaling_factor = math.sqrt(
+                1 + math.log(scale) /
+                math.log(self.original_max_position_embeddings))
+        if short_mscale is None:
+            short_mscale = scaling_factor
+        if long_mscale is None:
+            long_mscale = scaling_factor
+
+        self.short_mscale = short_mscale
+        self.long_mscale = long_mscale
+
+        short_cache = self._compute_cos_sin_cache(
+            original_max_position_embeddings, short_factor, short_mscale)
+        short_cache = short_cache.to(dtype)
+
+        long_cache = self._compute_cos_sin_cache(max_position_embeddings,
+                                                 long_factor, long_mscale)
+        long_cache = long_cache.to(dtype)
+
+        long_short_cache = torch.cat([short_cache, long_cache], dim=0)
+        self.register_buffer("long_short_cos_sin_cache",
+                             long_short_cache,
+                             persistent=False)
+
+    def _compute_inv_freq(self, rescale_factors: list[float]) -> torch.Tensor:
+        rescale_factors = torch.tensor(rescale_factors, dtype=torch.float32)
+        inv_freq = 1.0 / (rescale_factors * (self.base**(torch.arange(
+            0, self.rotary_dim, 2, dtype=torch.float) / self.rotary_dim)))
+        return inv_freq
+
+    def _compute_cos_sin_cache(
+        self,
+        max_position_embeddings: int,
+        rescale_factors: list[float],
+        mscale: float,
+    ) -> torch.Tensor:
+        inv_freq = self._compute_inv_freq(rescale_factors)
+        t = torch.arange(max_position_embeddings, dtype=torch.float)
+        freqs = torch.einsum("i,j -> ij", t, inv_freq)
+        cos = freqs.cos() * mscale
+        sin = freqs.sin() * mscale
+        cache = torch.cat((cos, sin), dim=-1)
+        return cache
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: Optional[torch.Tensor] = None,
+        offsets: Optional[torch.Tensor] = None,
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
+        assert key is not None
+        query = query.view(*query.shape[:-1], -1, self.head_size)
+        key = key.view(*key.shape[:-1], -1, self.head_size)
+
+        k = self.original_max_position_embeddings
+        long_prompt_offset = (torch.any(positions > k).float() *
+                              torch.full_like(positions, k)).long()
+        idx = (torch.add(positions, long_prompt_offset)
+               if long_prompt_offset is not None else positions)
+        idx = torch.add(idx, offsets) if offsets is not None else idx
+        cos_sin = torch.index_select(self.long_short_cos_sin_cache, 0, idx)
+
+        cos, sin = cos_sin.chunk(2, dim=-1)
+        cos = cos.repeat(1, 2).unsqueeze(-2)
+        sin = sin.repeat(1, 2).unsqueeze(-2)
+
+        query_rot = query[..., :self.rotary_dim]
+        query_pass = query[..., self.rotary_dim:]
+        query_rot = query_rot * cos + rotate_neox(query_rot) * sin
+        query = torch.cat((query_rot, query_pass), dim=-1)
+
+        key_rot = key[..., :self.rotary_dim]
+        key_pass = key[..., self.rotary_dim:]
+        key_rot = key_rot * cos + rotate_neox(key_rot) * sin
+        key = torch.cat((key_rot, key_pass), dim=-1)
+
+        return query.flatten(-2), key.flatten(-2)
diff --git a/vllm/model_executor/layers/rotary_embedding/yarn_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/yarn_scaling_rope.py
new file mode 100644
index 0000000000000..851565c5667a4
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/yarn_scaling_rope.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from .base import RotaryEmbedding
+from .common import (yarn_find_correction_range, yarn_get_mscale,
+                     yarn_linear_ramp_mask)
+
+
+class YaRNScalingRotaryEmbedding(RotaryEmbedding):
+    """RotaryEmbedding extended with YaRN method.
+
+    Credits to Peng et al. github.com/jquesnelle/yarn
+    """
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        scaling_factor: float,
+        dtype: torch.dtype,
+        *,
+        extrapolation_factor: float = 1,
+        attn_factor: float = 1,
+        beta_fast: int = 32,
+        beta_slow: int = 1,
+    ) -> None:
+        self.scaling_factor = scaling_factor
+        self.extrapolation_factor = extrapolation_factor
+        self.attn_factor = attn_factor
+        self.beta_fast = beta_fast
+        self.beta_slow = beta_slow
+        # Get n-d magnitude scaling corrected for interpolation
+        self.mscale = float(yarn_get_mscale(self.scaling_factor) * attn_factor)
+        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
+                         is_neox_style, dtype)
+
+    def _compute_inv_freq(self, scaling_factor: float) -> torch.Tensor:
+        pos_freqs = self.base**(
+            torch.arange(0, self.rotary_dim, 2, dtype=torch.float) /
+            self.rotary_dim)
+        inv_freq_extrapolation = 1.0 / pos_freqs
+        inv_freq_interpolation = 1.0 / (scaling_factor * pos_freqs)
+
+        low, high = yarn_find_correction_range(self.beta_fast, self.beta_slow,
+                                               self.rotary_dim, self.base,
+                                               self.max_position_embeddings)
+        # Get n-d rotational scaling corrected for extrapolation
+        inv_freq_mask = (1 - yarn_linear_ramp_mask(
+            low, high, self.rotary_dim // 2,
+            dtype=torch.float)) * self.extrapolation_factor
+        inv_freq = inv_freq_interpolation * (
+            1 - inv_freq_mask) + inv_freq_extrapolation * inv_freq_mask
+        return inv_freq
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        inv_freq = self._compute_inv_freq(self.scaling_factor)
+        t = torch.arange(self.max_position_embeddings * self.scaling_factor,
+                         dtype=torch.float32)
+        freqs = torch.einsum("i,j -> ij", t, inv_freq)
+        cos = (freqs.cos() * self.mscale)
+        sin = (freqs.sin() * self.mscale)
+        cache = torch.cat((cos, sin), dim=-1)
+        return cache

From 031ca762d7bdb566917c8aa39a0294fea89c55ed Mon Sep 17 00:00:00 2001
From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com>
Date: Mon, 4 Aug 2025 22:12:28 -0400
Subject: [PATCH 204/224] [ROCm][Bugfix] Compilation passes fix (#22202)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
---
 vllm/compilation/pass_manager.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py
index 54f00d5415216..e07e52be9fdf6 100644
--- a/vllm/compilation/pass_manager.py
+++ b/vllm/compilation/pass_manager.py
@@ -7,11 +7,13 @@ from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 
-if current_platform.is_cuda():
+if current_platform.is_cuda_alike():
     from .fusion import FusionPass
-    from .collective_fusion import AllReduceFusionPass, AsyncTPPass
     from .fusion_attn import AttnFusionPass
 
+if current_platform.is_cuda():
+    from .collective_fusion import AllReduceFusionPass, AsyncTPPass
+
 from .activation_quant_fusion import ActivationQuantFusionPass
 from .fix_functionalization import FixFunctionalizationPass
 from .inductor_pass import CustomGraphPass, InductorPass, get_pass_context

From 6fa41e0c32f3f1b3d4f146c7f6a9872dcf9d0968 Mon Sep 17 00:00:00 2001
From: Yuxuan Zhang <2448370773@qq.com>
Date: Tue, 5 Aug 2025 10:12:38 +0800
Subject: [PATCH 205/224] self.gate dtype update for GLM-4.5 (#22203)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
---
 docs/models/supported_models.md        | 2 +-
 tests/models/registry.py               | 2 +-
 vllm/model_executor/models/glm4_moe.py | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index be3d51a025edf..017a339ffca0c 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -606,7 +606,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
 | `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `zai-org/glm-4v-9b`, `zai-org/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
-| `Glm4v_moeForConditionalGeneration` | GLM-4.5V | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5V-Air`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `Glm4v_moeForConditionalGeneration` | GLM-4.5V | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5V`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
 | `H2OVLChatModel` | H2OVL | T + I<sup>E+</sup> | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc. | | ✅︎ | ✅︎ |
 | `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |
diff --git a/tests/models/registry.py b/tests/models/registry.py
index d86bd20fb0e34..47057d32e9cd7 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -383,7 +383,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
                                         trust_remote_code=True,
                                         hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
     "Glm4vForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.1V-9B-Thinking"),  # noqa: E501
-    "Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
+    "Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V",
                                           is_available_online=False),   # noqa: E501
     "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
                                       trust_remote_code=True,
diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py
index c702684c6caa1..bd3e27662ee7c 100644
--- a/vllm/model_executor/models/glm4_moe.py
+++ b/vllm/model_executor/models/glm4_moe.py
@@ -123,6 +123,7 @@ class Glm4MoE(nn.Module):
                                      config.n_routed_experts,
                                      bias=False,
                                      quant_config=None,
+                                     params_dtype=torch.float32,
                                      prefix=f"{prefix}.gate")
 
         self.gate.e_score_correction_bias = nn.Parameter(
@@ -180,7 +181,7 @@ class Glm4MoE(nn.Module):
 
         if self.n_shared_experts is not None:
             shared_output = self.shared_experts(hidden_states)
-        router_logits, _ = self.gate(hidden_states)
+        router_logits, _ = self.gate(hidden_states.to(dtype=torch.float32))
         final_hidden_states = self.experts(
             hidden_states=hidden_states,
             router_logits=router_logits) * self.routed_scaling_factor

From d7b28f34153a5116174383d97e41a1279b51e5cb Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Mon, 4 Aug 2025 22:13:19 -0400
Subject: [PATCH 206/224] [Log] DeepGEMM Update Log for Unaligned Problem Size
 (#22208)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 .../layers/fused_moe/deep_gemm_moe.py         | 21 +++++++++++++++++--
 .../layers/fused_moe/fused_moe.py             |  6 ++----
 .../layers/fused_moe/triton_deep_gemm_moe.py  |  4 ++--
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
index bd3605378b6dc..ba7105c83a92f 100644
--- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
@@ -33,7 +33,7 @@ def deep_gemm_block_shape() -> list[int]:
     return [block, block]
 
 
-def _valid_deep_gemm_shape(M: int, N: int, K: int):
+def _valid_deep_gemm_shape(M: int, N: int, K: int) -> bool:
     align = deep_gemm_block_shape()[0]
     return align <= M and N % align == 0 and K % align == 0
 
@@ -51,9 +51,26 @@ def _valid_deep_gemm(hidden_states: torch.Tensor, w1: torch.Tensor,
 
     M = hidden_states.size(0)
     _, K, N = w2.size()
+
+    align = deep_gemm_block_shape()[0]
+
     if not _valid_deep_gemm_shape(M, N, K):
         logger.debug_once(
-            "DeepGemm disabled: unaligned problem size. M: %s, N: %s, K: %s",
+            "DeepGemm disabled due to unaligned problem size. "
+            "M: %s, N: %s, K: %s. M should >= align size "
+            "and N and K must be multiples of %s."
+            "This is not an error and we will fall back to triton.",
+            M,
+            N,
+            K,
+            align,
+        )
+        return False
+    elif N <= 512:
+        logger.debug_once(
+            "DeepGemm disabled for N <= 512. M: %s, N: %s, K: %s. "
+            "This means we will fallback to triton "
+            "for this specific shape for further speed up.",
             M,
             N,
             K,
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 56d1dfe135b3b..597af08c3c9fa 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1360,10 +1360,8 @@ def fused_experts(
     # E8M0 scale, which means we requantize the weight and input to the specific
     # scale. Fallen back to cutlass or triton for some cases would cause
     # accuracy issue.
-    N = w1.size(1)
-    should_use_deep_gemm = ((N > 512
-                             and _valid_deep_gemm(hidden_states, w1, w2))
-                            or is_blackwell_deep_gemm_used())
+    should_use_deep_gemm = is_blackwell_deep_gemm_used() or _valid_deep_gemm(
+        hidden_states, w1, w2)
     if (allow_deep_gemm and use_fp8_w8a8 and should_use_deep_gemm):
         assert apply_router_weight_on_input is False
         assert is_act_and_mul, (
diff --git a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py
index 1b31368c79cd5..c67f7e808301a 100644
--- a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py
@@ -107,8 +107,8 @@ class TritonOrDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
         # Note: the deep gemm workspaces are strictly larger than the triton
         # workspaces so we can be pessimistic here and allocate for DeepGemm
         # even if we fall back to triton later, e.g. if expert maps are set.
-        if self.allow_deep_gemm and (_valid_deep_gemm_shape(M, N, K)
-                                     or is_blackwell_deep_gemm_used()):
+        if self.allow_deep_gemm and (is_blackwell_deep_gemm_used()
+                                     or _valid_deep_gemm_shape(M, N, K)):
             assert self.deep_gemm_expert is not None
             return self.deep_gemm_expert.workspace_shapes(
                 a, aq, M, N, K, topk, global_num_experts, local_num_experts,

From 8a6e108e76aed89ea23c345bd8fc46d904911e7c Mon Sep 17 00:00:00 2001
From: tlipoca9 <160737620+tlipoca9@users.noreply.github.com>
Date: Tue, 5 Aug 2025 10:15:31 +0800
Subject: [PATCH 207/224] fix: kimi_k2 return empty tool call list (#22149)

Signed-off-by: tlipoca9 <tlipoca9@gmail.com>
---
 .../openai/tool_parsers/kimi_k2_tool_parser.py            | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py
index b0df442dd8644..834b33052b45d 100644
--- a/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py
@@ -38,15 +38,15 @@ class KimiK2ToolParser(ToolParser):
         self.tool_call_end_token: str = "<|tool_call_end|>"
 
         self.tool_call_regex = re.compile(
-            r"<\|tool_call_begin\|>\s*(?P<tool_call_id>[\w\.]+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*?)\s*<\|tool_call_end\|>"
+            r"<\|tool_call_begin\|>\s*(?P<tool_call_id>.+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*?)\s*<\|tool_call_end\|>"
         )
 
         self.stream_tool_call_portion_regex = re.compile(
-            r"(?P<tool_call_id>[\w\.]+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*)"
+            r"(?P<tool_call_id>.+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*)"
         )
 
         self.stream_tool_call_name_regex = re.compile(
-            r"(?P<tool_call_id>[\w\.]+:\d+)\s*")
+            r"(?P<tool_call_id>.+:\d+)\s*")
 
         if not self.model_tokenizer:
             raise ValueError(
@@ -374,4 +374,4 @@ class KimiK2ToolParser(ToolParser):
 
         except Exception:
             logger.exception("Error trying to handle streaming tool call.")
-            return None  # do not stream a delta. skip this token ID.
\ No newline at end of file
+            return None  # do not stream a delta. skip this token ID.

From 7b455cf1c036d12470374d716800d0fd09290a5a Mon Sep 17 00:00:00 2001
From: elvischenv <219235043+elvischenv@users.noreply.github.com>
Date: Tue, 5 Aug 2025 10:17:18 +0800
Subject: [PATCH 208/224] [Misc] Remove pass_config from CompilationConfig
 dump_json excluded (#21911)

Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com>
---
 vllm/config.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/vllm/config.py b/vllm/config.py
index dd59526471782..1100e1077401c 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -4374,12 +4374,20 @@ class CompilationConfig:
             "disabled_custom_ops": True,
             "compilation_time": True,
             "bs_to_padded_graph_size": True,
-            "pass_config": True,
             "traced_files": True,
             "inductor_compile_config": {
                 "post_grad_custom_post_pass": True,
             },
         }
+
+        # exclude default attr in pass_config
+        pass_config_exclude = {}
+        for attr, default_val in vars(PassConfig()).items():
+            if getattr(self.pass_config, attr) == default_val:
+                pass_config_exclude[attr] = True
+        if pass_config_exclude:
+            exclude["pass_config"] = pass_config_exclude
+
         # The cast to string is necessary because Pydantic is mocked in docs
         # builds and sphinx-argparse doesn't know the return type of decode()
         return str(

From 29b97c09950fb6970756f5e2cfd4a3d7c1f4d72e Mon Sep 17 00:00:00 2001
From: Ning Xie <andy.xning@gmail.com>
Date: Tue, 5 Aug 2025 10:36:20 +0800
Subject: [PATCH 209/224] [Doc] add backend to doc string of
 initialize_model_parallel (#22142)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
---
 vllm/distributed/parallel_state.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index f31e4766bfdad..48a82d30193e3 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -1013,6 +1013,7 @@ def initialize_model_parallel(
             parallelism.
         pipeline_model_parallel_size: number of GPUs used for pipeline model
             parallelism.
+        backend: name of torch distributed communication backend.
 
     Let's say we have a total of 8 GPUs denoted by g0 ... g7 and we
     use 2 GPUs to parallelize the model tensor, and 4 GPUs to parallelize

From bd3db7f46965bfc979734a6d4b50cf96184c10d8 Mon Sep 17 00:00:00 2001
From: Ning Xie <andy.xning@gmail.com>
Date: Tue, 5 Aug 2025 10:36:55 +0800
Subject: [PATCH 210/224] [Misc] log more detailed message for
 ensure_model_parallel_initialized (#22144)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
---
 vllm/distributed/parallel_state.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index 48a82d30193e3..470c1355d2a91 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -1125,14 +1125,14 @@ def ensure_model_parallel_initialized(
 
     assert (
         get_tensor_model_parallel_world_size() == tensor_model_parallel_size
-    ), ("tensor parallel group already initialized, but of unexpected size: "
-        f"{get_tensor_model_parallel_world_size()=} vs. "
-        f"{tensor_model_parallel_size=}")
+    ), ("tensor parallel group already initialized, but of unexpected size. "
+        f"got: {get_tensor_model_parallel_world_size()=} vs. "
+        f"wanted: {tensor_model_parallel_size=}")
     pp_world_size = get_pp_group().world_size
     assert (pp_world_size == pipeline_model_parallel_size), (
-        "pipeline parallel group already initialized, but of unexpected size: "
-        f"{pp_world_size=} vs. "
-        f"{pipeline_model_parallel_size=}")
+        "pipeline parallel group already initialized, but of unexpected size. "
+        f"got: {pp_world_size=} vs. "
+        f"wanted: {pipeline_model_parallel_size=}")
 
 
 def prepare_communication_buffer_for_model(model: torch.nn.Module):

From 4b3e4474d73ae9cf0d6c8315570fdffd71037d08 Mon Sep 17 00:00:00 2001
From: "ZiTian.Zhao" <zitian.zhao@tencentmusic.com>
Date: Tue, 5 Aug 2025 12:43:24 +0800
Subject: [PATCH 211/224] Optimize configuration access with LRU cache in
 custom ops (#22204)

Signed-off-by: zitian zhao <zitian.zhao@tencentmusic.com>
---
 vllm/config.py                   | 10 +++++++++-
 vllm/model_executor/custom_op.py |  8 ++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 1100e1077401c..34952279c9d19 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -15,7 +15,7 @@ from collections.abc import Mapping
 from contextlib import contextmanager
 from dataclasses import (MISSING, Field, asdict, field, fields, is_dataclass,
                          replace)
-from functools import cached_property
+from functools import cached_property, lru_cache
 from importlib.util import find_spec
 from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
                     Protocol, TypeVar, Union, cast, get_args)
@@ -5123,6 +5123,14 @@ def set_current_vllm_config(vllm_config: VllmConfig,
     finally:
         _current_vllm_config = old_vllm_config
         _current_prefix = old_prefix
+        # Clear the compilation config cache when context changes
+        get_cached_compilation_config.cache_clear()
+
+
+@lru_cache(maxsize=1)
+def get_cached_compilation_config():
+    """Cache config to avoid repeated calls to get_current_vllm_config()"""
+    return get_current_vllm_config().compilation_config
 
 
 def get_current_vllm_config() -> VllmConfig:
diff --git a/vllm/model_executor/custom_op.py b/vllm/model_executor/custom_op.py
index f6e79cd676f8c..6b5a107396c92 100644
--- a/vllm/model_executor/custom_op.py
+++ b/vllm/model_executor/custom_op.py
@@ -5,7 +5,7 @@ from typing import Optional
 
 import torch.nn as nn
 
-from vllm.config import get_current_vllm_config
+from vllm.config import get_cached_compilation_config
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 
@@ -86,7 +86,7 @@ class CustomOp(nn.Module):
     def dispatch_forward(self):
         # NOTE(woosuk): Here we assume that vLLM was built for only one
         # specific backend. Currently, we do not support dynamic dispatching.
-        compilation_config = get_current_vllm_config().compilation_config
+        compilation_config = get_cached_compilation_config()
         enabled = self.enabled()
         if enabled:
             compilation_config.enabled_custom_ops.update([self.__class__.name])
@@ -115,7 +115,7 @@ class CustomOp(nn.Module):
     @classmethod
     def enabled(cls) -> bool:
         # if no name, then it was not registered
-        compilation_config = get_current_vllm_config().compilation_config
+        compilation_config = get_cached_compilation_config()
         custom_ops = compilation_config.custom_ops
         if not hasattr(cls, "name"):
             logger.warning_once(
@@ -138,7 +138,7 @@ class CustomOp(nn.Module):
         Specifying 'all' or 'none' in custom_op takes precedence.
         """
         from vllm.config import CompilationLevel
-        compilation_config = get_current_vllm_config().compilation_config
+        compilation_config = get_cached_compilation_config()
         default_on = (compilation_config.level < CompilationLevel.PIECEWISE
                       or not compilation_config.use_inductor)
         count_none = compilation_config.custom_ops.count("none")

From cdfd6871a5c4f125c9b3707ec5c1260db54f4b03 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Tue, 5 Aug 2025 13:40:09 +0800
Subject: [PATCH 212/224] [Bugfix] Misaligned params in TreeAttentionImpl
 (#22226)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/v1/attention/backends/tree_attn.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/vllm/v1/attention/backends/tree_attn.py b/vllm/v1/attention/backends/tree_attn.py
index 4fb7483284053..a071f0921df94 100644
--- a/vllm/v1/attention/backends/tree_attn.py
+++ b/vllm/v1/attention/backends/tree_attn.py
@@ -4,7 +4,7 @@
 
 import ast
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Optional
 
 import torch
 
@@ -313,15 +313,11 @@ class TreeAttentionImpl(AttentionImpl):
         alibi_slopes: Optional[list[float]],
         sliding_window: Optional[int],
         kv_cache_dtype: str,
-        blocksparse_params: Optional[dict[str, Any]] = None,
         logits_soft_cap: Optional[float] = None,
         attn_type: AttentionType = AttentionType.DECODER,
         kv_sharing_target_layer_name: Optional[str] = None,
         use_irope: bool = False,
     ) -> None:
-        if blocksparse_params is not None:
-            raise ValueError(
-                "TreeAttention does not support block-sparse attention.")
         self.num_heads = num_heads
         self.head_size = head_size
         self.scale = float(scale)

From e79a12fc3afb33171b06af3f1b74a42b29d1c6c2 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 5 Aug 2025 02:54:52 -0400
Subject: [PATCH 213/224] [UX] Fail if an invalid attention backend is
 specified (#22217)

Signed-off-by: mgoin <michael@neuralmagic.com>
---
 .../attention/test_attention_selector.py      | 20 +++++--------------
 vllm/attention/selector.py                    |  4 ++++
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index 93bf20da4adba..bfeafaa9e27e6 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -278,23 +278,13 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
 
 @pytest.mark.parametrize("use_v1", [True, False])
 def test_invalid_env(use_v1: bool, monkeypatch: pytest.MonkeyPatch):
-
+    """Test that invalid attention backend names raise ValueError."""
     with monkeypatch.context() as m, patch(
             "vllm.attention.selector.current_platform", CudaPlatform()):
         m.setenv("VLLM_USE_V1", "1" if use_v1 else "0")
         m.setenv(STR_BACKEND_ENV_VAR, STR_INVALID_VAL)
 
-        # Test with head size 32
-        backend = get_attn_backend(32, torch.float16, None, 16, False)
-        EXPECTED = "FLASH_ATTN_VLLM_V1" if use_v1 else "FLASH_ATTN"
-        assert backend.get_name() == EXPECTED
-
-        # when block size == 16, backend will fall back to XFORMERS
-        # this behavior is not yet supported on V1.
-        if use_v1:
-            # TODO: support fallback on V1!
-            # https://github.com/vllm-project/vllm/issues/14524
-            pass
-        else:
-            backend = get_attn_backend(16, torch.float16, None, 16, False)
-            assert backend.get_name() == "XFORMERS"
+        # Should raise ValueError for invalid backend
+        with pytest.raises(ValueError) as exc_info:
+            get_attn_backend(32, torch.float16, None, 16, False)
+        assert "Invalid attention backend: 'INVALID'" in str(exc_info.value)
diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py
index 2e3c8638125f7..596c556e54f06 100644
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@@ -193,6 +193,10 @@ def _cached_get_attn_backend(
         backend_by_env_var: Optional[str] = envs.VLLM_ATTENTION_BACKEND
         if backend_by_env_var is not None:
             selected_backend = backend_name_to_enum(backend_by_env_var)
+            if selected_backend is None:
+                raise ValueError(
+                    f"Invalid attention backend: '{backend_by_env_var}'. "
+                    f"Valid backends are: {list(_Backend.__members__.keys())}")
 
     # get device-specific attn_backend
     attention_cls = current_platform.get_attn_backend_cls(

From 811ac13d039648f2d78a636ce4366e70449380c8 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Tue, 5 Aug 2025 14:54:55 +0800
Subject: [PATCH 214/224] [Core] Factor out common logic for MM budget
 calculation (#22228)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/v1/worker/gpu_model_runner.py | 217 ++++++++++++++---------------
 vllm/v1/worker/tpu_model_runner.py | 189 +++++++++++--------------
 vllm/v1/worker/utils.py            | 109 +++++++++++++++
 3 files changed, 299 insertions(+), 216 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 041687ae28b20..85976fc1c825b 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -36,7 +36,8 @@ from vllm.model_executor.models.interfaces import (is_mixture_of_experts,
 from vllm.model_executor.models.interfaces_base import (
     VllmModelForPooling, is_pooling_model, is_text_generation_model)
 from vllm.multimodal import MULTIMODAL_REGISTRY
-from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
+from vllm.multimodal.inputs import (BatchedTensorInputs, MultiModalKwargs,
+                                    PlaceholderRange)
 from vllm.multimodal.utils import group_mm_inputs_by_modality
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import SamplingType
@@ -51,7 +52,6 @@ from vllm.v1.attention.backends.utils import (
     make_kv_sharing_fast_prefill_attention_metadata,
     make_local_attention_virtual_batches,
     reorder_batch_to_split_decodes_and_prefills)
-from vllm.v1.core.encoder_cache_manager import compute_encoder_budget
 from vllm.v1.kv_cache_interface import (AttentionSpec,
                                         ChunkedLocalAttentionSpec,
                                         FullAttentionSpec, KVCacheConfig,
@@ -73,7 +73,7 @@ from vllm.v1.worker.kv_connector_model_runner_mixin import (
 from vllm.v1.worker.lora_model_runner_mixin import LoRAModelRunnerMixin
 
 from ..sample.logits_processor import LogitsProcessorManager
-from .utils import (bind_kv_cache, gather_mm_placeholders,
+from .utils import (MultiModalBudget, bind_kv_cache, gather_mm_placeholders,
                     initialize_kv_cache_for_kv_sharing,
                     sanity_check_mm_encoder_outputs, scatter_mm_placeholders)
 
@@ -148,14 +148,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         self.mm_registry = MULTIMODAL_REGISTRY
         self.uses_mrope = model_config.uses_mrope
 
-        encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
-            model_config=model_config,
-            scheduler_config=scheduler_config,
-            mm_registry=self.mm_registry,
-        )
-        self.max_num_encoder_input_tokens = encoder_compute_budget
-        self.encoder_cache_size = encoder_cache_size
-
         # Sampler
         self.sampler = Sampler(logprobs_mode=self.model_config.logprobs_mode)
 
@@ -330,6 +322,14 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             self.kv_sharing_fast_prefill_logits_indices = torch.zeros(
                 self.max_num_tokens, dtype=torch.int32, device=self.device)
 
+        self.mm_budget = (MultiModalBudget(
+            self.model_config,
+            self.scheduler_config,
+            self.mm_registry,
+            max_model_len=self.max_model_len,
+            max_num_reqs=self.max_num_reqs,
+        ) if self.is_multimodal_model else None)
+
         self.reorder_batch_threshold: Optional[int] = None
 
     def _may_reorder_batch(self, scheduler_output: "SchedulerOutput") -> None:
@@ -578,37 +578,33 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         # Refresh batch metadata with any pending updates.
         self.input_batch.refresh_metadata()
 
-    def _init_model_kwargs_for_multimodal_model(
+    def _extract_mm_kwargs(
         self,
-        scheduler_output: Optional["SchedulerOutput"] = None,
-        num_reqs: int = -1,
-    ) -> dict[str, Any]:
-
-        model_kwargs: dict[str, Any] = {}
-        if self.is_multimodal_raw_input_supported:
-            # This model requires the raw multimodal data in input.
+        scheduler_output: "SchedulerOutput",
+    ) -> BatchedTensorInputs:
+        if self.is_multimodal_raw_input_supported:  # noqa: SIM102
             if scheduler_output:
-                multi_modal_kwargs_list = []
+                multi_modal_kwargs_list = list[MultiModalKwargs]()
                 for req in scheduler_output.scheduled_new_reqs:
                     req_mm_inputs = req.mm_inputs
                     if not isinstance(req_mm_inputs, list):
                         req_mm_inputs = list(req_mm_inputs)
                     multi_modal_kwargs_list.extend(req_mm_inputs)
-                multi_modal_kwargs = MultiModalKwargs.batch(
-                    multi_modal_kwargs_list)
-            else:
-                # The only case where SchedulerOutput is None is for
-                # a dummy run let's get some dummy data.
-                dummy_data = [
-                    self.mm_registry.get_decoder_dummy_data(
-                        model_config=self.model_config,
-                        seq_len=1).multi_modal_data for i in range(num_reqs)
-                ]
-                multi_modal_kwargs = MultiModalKwargs.batch(dummy_data)
 
-            model_kwargs.update(multi_modal_kwargs)
+                return MultiModalKwargs.batch(multi_modal_kwargs_list)
 
-        return model_kwargs
+        return {}
+
+    def _dummy_mm_kwargs(self, num_seqs: int) -> BatchedTensorInputs:
+        if self.is_multimodal_raw_input_supported:
+            mm_budget = self.mm_budget
+            assert mm_budget is not None
+
+            dummy_modality, _ = mm_budget.get_modality_with_max_tokens()
+
+            return self._get_mm_dummy_batch(dummy_modality, num_seqs)
+
+        return {}
 
     def _get_cumsum_and_arange(
         self,
@@ -1517,19 +1513,18 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             # NOTE(woosuk): To unify token ids and soft tokens (vision
             # embeddings), we always use embeddings (rather than token ids)
             # as input to the multimodal model, even when the input is text.
-            input_ids = self.input_ids[:num_scheduled_tokens]
-
-            model_kwargs = self._init_model_kwargs_for_multimodal_model(
-                scheduler_output=scheduler_output)
-            inputs_embeds = self.model.get_input_embeddings(
-                input_ids=input_ids,
+            inputs_embeds_scheduled = self.model.get_input_embeddings(
+                input_ids=self.input_ids[:num_scheduled_tokens],
                 multimodal_embeddings=mm_embeds or None,
             )
 
             # TODO(woosuk): Avoid the copy. Optimize.
-            self.inputs_embeds[:num_scheduled_tokens].copy_(inputs_embeds)
-            inputs_embeds = self.inputs_embeds[:num_input_tokens]
+            self.inputs_embeds[:num_scheduled_tokens].copy_(
+                inputs_embeds_scheduled)
+
             input_ids = None
+            inputs_embeds = self.inputs_embeds[:num_input_tokens]
+            model_mm_kwargs = self._extract_mm_kwargs(scheduler_output)
         else:
             # For text-only models, we use token ids as input.
             # While it is possible to use embeddings as input just like the
@@ -1537,7 +1532,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             # then the embedding layer is not included in the CUDA graph.
             input_ids = self.input_ids[:num_input_tokens]
             inputs_embeds = None
-            model_kwargs = {}
+            model_mm_kwargs = {}
         if self.uses_mrope:
             positions = self.mrope_positions[:, :num_input_tokens]
         else:
@@ -1571,7 +1566,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 intermediate_tensors=intermediate_tensors,
                 inputs_embeds=inputs_embeds,
                 **MultiModalKwargs.as_kwargs(
-                    model_kwargs,
+                    model_mm_kwargs,
                     device=self.device,
                 ),
             )
@@ -2149,6 +2144,30 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             yield
             input_ids.fill_(0)
 
+    def _get_mm_dummy_batch(
+        self,
+        modality: str,
+        max_items_per_batch: int,
+    ) -> BatchedTensorInputs:
+        """Dummy data for profiling and precompiling multimodal models."""
+        dummy_decoder_data = self.mm_registry.get_decoder_dummy_data(
+            model_config=self.model_config,
+            seq_len=self.max_num_tokens,
+            mm_counts={modality: 1},
+        )
+        dummy_mm_data = dummy_decoder_data.multi_modal_data
+
+        # Result in the maximum GPU consumption of the model
+        dummy_mm_item = dummy_mm_data.get_item(modality=modality, item_index=0)
+        dummy_mm_kwargs = MultiModalKwargs.from_items([dummy_mm_item])
+
+        batched_dummy_mm_inputs = MultiModalKwargs.batch([dummy_mm_kwargs] *
+                                                         max_items_per_batch)
+        return MultiModalKwargs.as_kwargs(
+            batched_dummy_mm_inputs,
+            device=self.device,
+        )
+
     @torch.inference_mode()
     def _dummy_run(
         self,
@@ -2213,16 +2232,14 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
 
         with self.maybe_dummy_run_with_lora(self.lora_config,
                                             num_scheduled_tokens):
-            model = self.model
             if self.is_multimodal_model:
-                model_kwargs = self._init_model_kwargs_for_multimodal_model(
-                    num_reqs=num_reqs)
                 input_ids = None
                 inputs_embeds = self.inputs_embeds[:num_tokens]
+                model_mm_kwargs = self._dummy_mm_kwargs(num_reqs)
             else:
                 input_ids = self.input_ids[:num_tokens]
                 inputs_embeds = None
-                model_kwargs = {}
+                model_mm_kwargs = {}
 
             if self.uses_mrope:
                 positions = self.mrope_positions[:, :num_tokens]
@@ -2247,13 +2264,13 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                     self.vllm_config,
                     num_tokens=num_tokens,
                     num_tokens_across_dp=num_tokens_across_dp):
-                outputs = model(
+                outputs = self.model(
                     input_ids=input_ids,
                     positions=positions,
                     intermediate_tensors=intermediate_tensors,
                     inputs_embeds=inputs_embeds,
                     **MultiModalKwargs.as_kwargs(
-                        model_kwargs,
+                        model_mm_kwargs,
                         device=self.device,
                     ),
                 )
@@ -2423,75 +2440,51 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
 
     def profile_run(self) -> None:
         # Profile with multimodal encoder & encoder cache.
-        # TODO: handle encoder-decoder models once we support them.
-        if (self.is_multimodal_model and self.max_num_encoder_input_tokens > 0
-                and self.encoder_cache_size > 0):
+        if self.is_multimodal_model:
+            mm_budget = self.mm_budget
+            assert mm_budget is not None
 
-            # NOTE: Currently model is profiled with a single non-text
-            # modality with the max possible input tokens even when
-            # it supports multiple.
-            max_tokens_by_modality_dict = self.mm_registry \
-                .get_max_tokens_per_item_by_nonzero_modality(self.model_config)
-            dummy_data_modality, max_tokens_per_mm_item = max(
-                max_tokens_by_modality_dict.items(), key=lambda item: item[1])
+            # TODO: handle encoder-decoder models once we support them.
+            if (encoder_budget := mm_budget.get_encoder_budget()) > 0:
+                # NOTE: Currently model is profiled with a single non-text
+                # modality with the max possible input tokens even when
+                # it supports multiple.
+                (
+                    dummy_modality,
+                    max_tokens,
+                ) = mm_budget.get_modality_with_max_tokens()
+                (
+                    max_mm_items_per_prompt,
+                    max_mm_items_per_batch,
+                ) = mm_budget.get_max_items(dummy_modality, max_tokens)
 
-            # Check how many items of this modality can be supported by
-            # the encoder budget.
-            encoder_budget = min(self.max_num_encoder_input_tokens,
-                                 self.encoder_cache_size)
+                logger.info(
+                    "Encoder cache will be initialized with a budget of "
+                    "%s tokens, and profiled with %s %s items of the maximum "
+                    "feature size.",
+                    encoder_budget,
+                    max_mm_items_per_batch,
+                    dummy_modality,
+                )
 
-            max_num_mm_items_encoder_budget = encoder_budget // \
-                max_tokens_per_mm_item
+                # Create dummy batch of multimodal inputs.
+                batched_dummy_mm_inputs = self._get_mm_dummy_batch(
+                    dummy_modality,
+                    max_mm_items_per_batch,
+                )
 
-            # Check how many items of this modality can be supported by
-            # the decoder budget.
-            max_mm_items_per_req = self.mm_registry.get_mm_limits_per_prompt(
-                self.model_config)[dummy_data_modality]
+                # Run multimodal encoder.
+                dummy_encoder_outputs = self.model.get_multimodal_embeddings(
+                    **batched_dummy_mm_inputs)
 
-            # NOTE: We do not consider max_num_batched_tokens on purpose
-            # because the multimodal embeddings can be generated in advance
-            # and chunked prefilled.
-            max_num_mm_items_decoder_budget = self.max_num_reqs * \
-                max_mm_items_per_req
+                sanity_check_mm_encoder_outputs(
+                    dummy_encoder_outputs,
+                    expected_num_items=max_mm_items_per_batch,
+                )
 
-            max_num_mm_items = max(
-                1,
-                min(max_num_mm_items_encoder_budget,
-                    max_num_mm_items_decoder_budget))
-
-            logger.info(
-                "Encoder cache will be initialized with a budget of %s tokens,"
-                " and profiled with %s %s items of the maximum feature size.",
-                encoder_budget, max_num_mm_items, dummy_data_modality)
-
-            # Create dummy batch of multimodal inputs.
-            dummy_mm_kwargs = self.mm_registry.get_decoder_dummy_data(
-                model_config=self.model_config,
-                seq_len=max_tokens_per_mm_item,
-                mm_counts={
-                    dummy_data_modality: 1
-                },
-            ).multi_modal_data
-
-            batched_dummy_mm_inputs = MultiModalKwargs.batch(
-                [dummy_mm_kwargs] * max_num_mm_items,
-                pin_memory=self.pin_memory)
-            batched_dummy_mm_inputs = MultiModalKwargs.as_kwargs(
-                batched_dummy_mm_inputs,
-                device=self.device,
-            )
-
-            # Run multimodal encoder.
-            dummy_encoder_outputs = self.model.get_multimodal_embeddings(
-                **batched_dummy_mm_inputs)
-
-            sanity_check_mm_encoder_outputs(
-                dummy_encoder_outputs,
-                expected_num_items=max_num_mm_items,
-            )
-
-            # Cache the dummy encoder outputs.
-            self.encoder_cache["tmp"] = dict(enumerate(dummy_encoder_outputs))
+                # Cache the dummy encoder outputs.
+                self.encoder_cache["tmp"] = dict(
+                    enumerate(dummy_encoder_outputs))
 
         # Add `is_profile` here to pre-allocate communication buffers
         hidden_states, last_hidden_states \
diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py
index 67cb2f9dd810e..5f3188efdb244 100644
--- a/vllm/v1/worker/tpu_model_runner.py
+++ b/vllm/v1/worker/tpu_model_runner.py
@@ -42,7 +42,6 @@ from vllm.v1.attention.backends.pallas import (TPU_STR_DTYPE_TO_TORCH_DTYPE,
                                                PallasAttentionBackend,
                                                PallasMetadata,
                                                get_page_size_bytes)
-from vllm.v1.core.encoder_cache_manager import compute_encoder_budget
 from vllm.v1.kv_cache_interface import (AttentionSpec, FullAttentionSpec,
                                         KVCacheConfig, KVCacheSpec,
                                         SlidingWindowSpec)
@@ -55,7 +54,8 @@ from vllm.v1.worker.kv_connector_model_runner_mixin import (
 from vllm.v1.worker.lora_model_runner_mixin import LoRAModelRunnerMixin
 from vllm.v1.worker.tpu_input_batch import CachedRequestState, InputBatch
 
-from .utils import (bind_kv_cache, initialize_kv_cache_for_kv_sharing,
+from .utils import (MultiModalBudget, bind_kv_cache,
+                    initialize_kv_cache_for_kv_sharing,
                     sanity_check_mm_encoder_outputs)
 
 if TYPE_CHECKING:
@@ -195,14 +195,6 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         # TODO: Support M-RoPE (e.g, Qwen2-VL)
         assert not self.uses_mrope, "TPU does not support M-RoPE yet."
 
-        encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
-            model_config=model_config,
-            scheduler_config=scheduler_config,
-            mm_registry=self.mm_registry,
-        )
-        self.max_num_encoder_input_tokens = encoder_compute_budget
-        self.encoder_cache_size = encoder_cache_size
-
         self._num_slices_per_kv_cache_update_block = \
             _get_num_slices_per_kv_cache_update_block(get_page_size_bytes(
                 block_size=self.block_size,
@@ -294,36 +286,13 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         self.structured_decode_arange = torch.arange(
             0, 32, device="cpu", pin_memory=self.pin_memory)
 
-        # Get maximum number of mm items per modality (batch size).
-        self.max_num_mm_items_by_modality = dict()
-        if (self.is_multimodal_model and self.max_num_encoder_input_tokens > 0
-                and self.encoder_cache_size > 0):
-            max_tokens_by_modality_dict = (
-                MULTIMODAL_REGISTRY.
-                get_max_tokens_per_item_by_nonzero_modality(self.model_config))
-            for modality, max_tokens in max_tokens_by_modality_dict.items():
-                # Check how many items of this modality can be supported by
-                # the encoder budget.
-                encoder_budget = min(self.max_num_encoder_input_tokens,
-                                     self.encoder_cache_size)
-
-                max_num_mm_items_encoder_budget = cdiv(encoder_budget,
-                                                       max_tokens)
-
-                # Check how many items of this modality can be supported by
-                # the decoder budget.
-                max_mm_items_per_req = self.mm_registry.\
-                    get_mm_limits_per_prompt(self.model_config)[modality]
-
-                # NOTE: We do not consider max_num_batched_tokens on purpose
-                # because the multimodal embeddings can be generated in advance
-                # and chunked prefilled.
-                max_num_mm_items_decoder_budget = self.max_num_reqs * \
-                    max_mm_items_per_req
-
-                max_num_mm_items = min(max_num_mm_items_encoder_budget,
-                                       max_num_mm_items_decoder_budget)
-                self.max_num_mm_items_by_modality[modality] = max_num_mm_items
+        self.mm_budget = (MultiModalBudget(
+            self.model_config,
+            self.scheduler_config,
+            self.mm_registry,
+            max_model_len=self.max_model_len,
+            max_num_reqs=self.max_num_reqs,
+        ) if self.is_multimodal_model else None)
 
         if not self.use_spmd:
             self.sample_from_logits_func = torch.compile(
@@ -1335,23 +1304,33 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         xm.mark_step()  # Captures metadata updates
 
     def _precompile_mm_encoder(self) -> None:
+        if not self.is_multimodal_model:
+            return
+
         # Pre-compile MM encoder for all supported data modalities.
         hf_config = self.vllm_config.model_config.hf_config
-        for mode, max_items_by_mode in \
-            self.max_num_mm_items_by_modality.items():
+
+        mm_budget = self.mm_budget
+        assert mm_budget is not None
+
+        max_items_per_seq_by_modality = mm_budget.max_items_per_batch_by_modality  # noqa: E501
+
+        for mode, max_items_per_seq in max_items_per_seq_by_modality.items():
             logger.info(
                 "Compiling Multimodal %s Encoder with different input"
                 " shapes.", mode)
             start = time.perf_counter()
             # No padding for MM encoder just yet.
-            for num_items in range(1, max_items_by_mode + 1):
+            for num_items in range(1, max_items_per_seq + 1):
                 logger.info("  -- mode: %s items: %d", mode, num_items)
                 batched_dummy_mm_inputs = self._get_mm_dummy_batch(
-                    mode, num_items)
+                    mode,
+                    num_items,
+                )
                 # Run multimodal encoder.
                 xm.mark_step()
-                mm_embeds = self.model.\
-                    get_multimodal_embeddings(**batched_dummy_mm_inputs)
+                mm_embeds = self.model.get_multimodal_embeddings(
+                    **batched_dummy_mm_inputs)
                 xm.mark_step()
                 num_patches = mm_embeds[0].shape[0]
                 items_size = num_patches * num_items
@@ -1547,51 +1526,61 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         num_tokens: int,
     ) -> None:
         # Profile with multimodal encoder & encoder cache.
-        # TODO: handle encoder-decoder models once we support them.
-        if (self.is_multimodal_model and self.max_num_encoder_input_tokens > 0
-                and self.encoder_cache_size > 0):
+        if self.is_multimodal_model:
+            mm_budget = self.mm_budget
+            assert mm_budget is not None
 
-            # NOTE: Currently model is profiled with a single non-text
-            # modality with the max possible input tokens even when
-            # it supports multiple.
-            dummy_data_modality, max_num_mm_items = max(
-                self.max_num_mm_items_by_modality.items(), key=lambda t: t[1])
+            # TODO: handle encoder-decoder models once we support them.
+            if (encoder_budget := mm_budget.get_encoder_budget()) > 0:
+                # NOTE: Currently model is profiled with a single non-text
+                # modality with the max possible input tokens even when
+                # it supports multiple.
+                (
+                    dummy_modality,
+                    max_tokens,
+                ) = mm_budget.get_modality_with_max_tokens()
+                (
+                    max_mm_items_per_prompt,
+                    max_mm_items_per_batch,
+                ) = mm_budget.get_max_items(dummy_modality, max_tokens)
 
-            encoder_budget = min(self.max_num_encoder_input_tokens,
-                                 self.encoder_cache_size)
+                logger.info(
+                    "Encoder cache will be initialized with a budget of "
+                    "%s tokens, and profiled with %s %s items of the maximum "
+                    "feature size.",
+                    encoder_budget,
+                    max_mm_items_per_batch,
+                    dummy_modality,
+                )
 
-            logger.info(
-                "Encoder cache will be initialized with a budget of %d tokens,"
-                " and profiled with %s %s items of the maximum feature size.",
-                encoder_budget, max_num_mm_items, dummy_data_modality)
+                # Create dummy batch of multimodal inputs.
+                batched_dummy_mm_inputs = self._get_mm_dummy_batch(
+                    dummy_modality,
+                    max_mm_items_per_batch,
+                )
 
-            # Create dummy batch of multimodal inputs.
-            batched_dummy_mm_inputs = self._get_mm_dummy_batch(
-                dummy_data_modality, max_num_mm_items)
+                # Run multimodal encoder.
+                # Isolate encoder graph from post-processing to minimize
+                # impact of recompilation until it's fixed.
+                start = time.perf_counter()
+                xm.mark_step()
+                dummy_encoder_outputs = self.model.get_multimodal_embeddings(
+                    **batched_dummy_mm_inputs)
+                xm.mark_step()
+                xm.wait_device_ops()
+                end = time.perf_counter()
+                logger.info(
+                    "Multimodal Encoder profiling finished in in %.2f [secs].",
+                    end - start)
 
-            # Run multimodal encoder.
-            # Isolate encoder graph from post-processing to minimize
-            # impact of recompilation until it's fixed.
-            start = time.perf_counter()
-            xm.mark_step()
-            dummy_encoder_outputs = self.model.get_multimodal_embeddings(
-                **batched_dummy_mm_inputs)
-            xm.mark_step()
-            xm.wait_device_ops()
-            end = time.perf_counter()
-            logger.info(
-                "Multimodal Encoder profiling finished in in %.2f [secs].",
-                end - start)
+                sanity_check_mm_encoder_outputs(
+                    dummy_encoder_outputs,
+                    expected_num_items=max_mm_items_per_batch,
+                )
 
-            assert len(dummy_encoder_outputs) == max_num_mm_items, (
-                "Expected dimension 0 of encoder outputs to match the number "
-                f"of multimodal data items: {max_num_mm_items}, got "
-                f"{len(dummy_encoder_outputs)=} instead. This is most likely "
-                "due to the 'get_multimodal_embeddings' method of the model "
-                "not implemented correctly.")
-
-            # Cache the dummy encoder outputs.
-            self.encoder_cache["tmp"] = dict(enumerate(dummy_encoder_outputs))
+                # Cache the dummy encoder outputs.
+                self.encoder_cache["tmp"] = dict(
+                    enumerate(dummy_encoder_outputs))
 
         # Trigger compilation for general shape.
         self._dummy_run(num_tokens, self.num_reqs_max_model_len,
@@ -1809,33 +1798,25 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             self.grammar_bitmask_cpu[:num_reqs].to(logits.device), \
             self.structured_decode_arange.to(logits.device)
 
-    def _get_mm_dummy_batch(self, modality: str,
-                            batch_size: int) -> BatchedTensorInputs:
-        # Dummy data for pre-compiling multimodal models.
-        dummy_request_data = self.mm_registry.get_decoder_dummy_data(
+    def _get_mm_dummy_batch(
+        self,
+        modality: str,
+        max_items_per_batch: int,
+    ) -> BatchedTensorInputs:
+        """Dummy data for profiling and precompiling multimodal models."""
+        dummy_decoder_data = self.mm_registry.get_decoder_dummy_data(
             model_config=self.model_config,
             seq_len=self.max_num_tokens,
+            mm_counts={modality: 1},
         )
-        dummy_mm_data = dummy_request_data.multi_modal_data
+        dummy_mm_data = dummy_decoder_data.multi_modal_data
 
-        # Dummy data definition in V0 may contain multiple multimodal items
-        # (e.g, multiple images) for a single request, therefore here we
-        # always replicate first item by max_num_mm_items times since in V1
-        # they are scheduled to be processed separately.
-        assert isinstance(dummy_mm_data, MultiModalKwargs), (
-            "Expected dummy multimodal data to be of type "
-            f"MultiModalKwargs, got {type(dummy_mm_data)=} instead. "
-            "This is most likely due to the model not having a merged "
-            "processor.")
-
-        # When models have a merged processor, their dummy data is
-        # already batched `MultiModalKwargs`, therefore we take the first
-        # `MultiModalKwargsItem` from the desired modality to profile on.
+        # Result in the maximum GPU consumption of the model
         dummy_mm_item = dummy_mm_data.get_item(modality=modality, item_index=0)
         dummy_mm_kwargs = MultiModalKwargs.from_items([dummy_mm_item])
 
         batched_dummy_mm_inputs = MultiModalKwargs.batch([dummy_mm_kwargs] *
-                                                         batch_size)
+                                                         max_items_per_batch)
         return MultiModalKwargs.as_kwargs(
             batched_dummy_mm_inputs,
             device=self.device,
diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py
index 3ecb1d7dd6560..6761b3c5e41db 100644
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -5,14 +5,123 @@ from typing import TYPE_CHECKING, Optional
 
 import torch
 
+from vllm.config import ModelConfig, SchedulerConfig
 from vllm.model_executor.models.interfaces import MultiModalEmbeddings
 from vllm.model_executor.models.utils import extract_layer_index
+from vllm.multimodal.registry import MultiModalRegistry
+from vllm.v1.core.encoder_cache_manager import compute_encoder_budget
 from vllm.v1.kv_cache_interface import KVCacheGroupSpec
 
 if TYPE_CHECKING:
     from vllm.attention.layer import Attention
 
 
+class MultiModalBudget:
+    """Helper class to calculate budget information for multi-modal models."""
+
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        scheduler_config: SchedulerConfig,
+        mm_registry: MultiModalRegistry,
+        *,
+        max_model_len: int,
+        max_num_reqs: int,
+    ) -> None:
+        super().__init__()
+
+        self.model_config = model_config
+        self.scheduler_config = scheduler_config
+        self.mm_registry = mm_registry
+
+        encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
+            model_config=model_config,
+            scheduler_config=scheduler_config,
+            mm_registry=mm_registry,
+        )
+
+        self.max_num_encoder_input_tokens = encoder_compute_budget
+        self.encoder_cache_size = encoder_cache_size
+        self.max_model_len = max_model_len
+        self.max_num_reqs = max_num_reqs
+
+        self.mm_limits = mm_registry.get_mm_limits_per_prompt(model_config)
+
+        max_items_per_prompt_by_modality = dict[str, int]()
+        max_items_per_batch_by_modality = dict[str, int]()
+
+        max_tokens_by_modality = mm_registry \
+            .get_max_tokens_per_item_by_nonzero_modality(model_config)
+
+        for modality, max_tokens in max_tokens_by_modality.items():
+            (
+                max_items_per_prompt,
+                max_items_per_batch,
+            ) = self.get_max_items(modality, max_tokens)
+
+            max_items_per_prompt_by_modality[modality] = max_items_per_prompt
+            max_items_per_batch_by_modality[modality] = max_items_per_batch
+
+        self.max_tokens_by_modality = max_tokens_by_modality
+        self.max_items_per_prompt_by_modality = max_items_per_prompt_by_modality
+        self.max_items_per_batch_by_modality = max_items_per_batch_by_modality
+
+    def get_modality_with_max_tokens(self) -> tuple[str, int]:
+        max_tokens_by_modality = self.max_tokens_by_modality
+        modality, max_tokens = max(max_tokens_by_modality.items(),
+                                   key=lambda item: item[1])
+
+        return modality, max_tokens
+
+    def get_encoder_budget(self) -> int:
+        return min(self.max_num_encoder_input_tokens, self.encoder_cache_size)
+
+    def get_max_items(
+        self,
+        modality: str,
+        max_tokens_per_item: int,
+    ) -> tuple[int, int]:
+        if max_tokens_per_item == 0:
+            return 0, 0
+
+        # Check how many items of this modality can be supported by
+        # the encoder budget.
+        encoder_budget = self.get_encoder_budget()
+
+        # TODO: handle encoder-decoder models once we support them.
+        if encoder_budget == 0:
+            return 0, 0
+
+        max_encoder_items_per_batch = encoder_budget // max_tokens_per_item
+
+        # Check how many items of this modality can be supported by
+        # the decoder budget.
+        mm_limit = self.mm_limits[modality]
+
+        max_items_per_prompt = max(
+            1,
+            min(mm_limit, self.max_model_len // max_tokens_per_item),
+        )
+
+        scheduler_config = self.scheduler_config
+        max_num_reqs = self.max_num_reqs
+
+        if not scheduler_config.enable_chunked_prefill:
+            max_num_reqs = min(
+                max_num_reqs,
+                scheduler_config.max_num_batched_tokens // max_tokens_per_item,
+            )
+
+        max_decoder_items_per_batch = max_num_reqs * max_items_per_prompt
+
+        max_items_per_batch = max(
+            1,
+            min(max_encoder_items_per_batch, max_decoder_items_per_batch),
+        )
+
+        return max_items_per_prompt, max_items_per_batch
+
+
 def sanity_check_mm_encoder_outputs(
     mm_embeddings: MultiModalEmbeddings,
     expected_num_items: int,

From 586f286789a09f5616be74ee8bedde0a9f698a72 Mon Sep 17 00:00:00 2001
From: "wang.yuqi" <noooop@126.com>
Date: Tue, 5 Aug 2025 15:37:00 +0800
Subject: [PATCH 215/224] [Model] Pooling model activation supports per request
 control by PoolingParams (#20538)

Signed-off-by: wang.yuqi <noooop@126.com>
---
 tests/entrypoints/llm/test_classify.py        |  67 ++++++
 tests/entrypoints/llm/test_embedding.py       |  56 +++++
 tests/entrypoints/llm/test_reward.py          |  66 ++++++
 tests/entrypoints/llm/test_score.py           |  69 ++++++
 .../entrypoints/openai/test_classification.py |  31 +++
 tests/entrypoints/openai/test_embedding.py    |  34 +++
 tests/entrypoints/openai/test_rerank.py       |  38 +++
 tests/entrypoints/openai/test_score.py        |  41 ++++
 .../pooling/test_override_pooler_config.py    | 127 ++++++++++
 tests/models/language/pooling/test_reward.py  |   4 +-
 tests/models/utils.py                         |   7 +
 tests/test_pooling_params.py                  | 106 +++++++++
 vllm/config.py                                |  30 +--
 vllm/entrypoints/llm.py                       |  22 +-
 vllm/entrypoints/openai/protocol.py           |  20 +-
 vllm/model_executor/layers/pooler.py          | 222 +++++++++---------
 vllm/model_executor/models/config.py          |  32 +++
 vllm/model_executor/models/jamba.py           |   2 -
 vllm/model_executor/models/jina_vl.py         |   5 +-
 vllm/model_executor/models/qwen2_rm.py        |   3 -
 vllm/pooling_params.py                        | 139 +++++++++--
 21 files changed, 948 insertions(+), 173 deletions(-)
 create mode 100644 tests/entrypoints/llm/test_classify.py
 create mode 100644 tests/entrypoints/llm/test_embedding.py
 create mode 100644 tests/entrypoints/llm/test_reward.py
 create mode 100644 tests/entrypoints/llm/test_score.py
 create mode 100644 tests/models/language/pooling/test_override_pooler_config.py
 create mode 100644 tests/test_pooling_params.py

diff --git a/tests/entrypoints/llm/test_classify.py b/tests/entrypoints/llm/test_classify.py
new file mode 100644
index 0000000000000..abdce8935ea58
--- /dev/null
+++ b/tests/entrypoints/llm/test_classify.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import weakref
+
+import pytest
+import torch
+
+from vllm import LLM, PoolingParams
+from vllm.distributed import cleanup_dist_env_and_memory
+
+from ...models.utils import softmax
+
+MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
+
+prompts = ["The chef prepared a delicious meal."]
+
+
+@pytest.fixture(autouse=True)
+def v1(run_with_both_engines):
+    # Simple autouse wrapper to run both engines for each test
+    # This can be promoted up to conftest.py to run for every
+    # test in a package
+    pass
+
+
+@pytest.fixture(scope="module")
+def llm():
+    # pytest caches the fixture so we use weakref.proxy to
+    # enable garbage collection
+    llm = LLM(model=MODEL_NAME,
+              max_num_batched_tokens=32768,
+              tensor_parallel_size=1,
+              gpu_memory_utilization=0.75,
+              enforce_eager=True,
+              seed=0)
+
+    with llm.deprecate_legacy_api():
+        yield weakref.proxy(llm)
+
+        del llm
+
+    cleanup_dist_env_and_memory()
+
+
+@pytest.mark.skip_global_cleanup
+def test_pooling_params(llm: LLM):
+
+    def get_outputs(activation):
+        outputs = llm.classify(
+            prompts,
+            pooling_params=PoolingParams(activation=activation),
+            use_tqdm=False)
+        return torch.tensor([x.outputs.probs for x in outputs])
+
+    default = get_outputs(activation=None)
+    w_activation = get_outputs(activation=True)
+    wo_activation = get_outputs(activation=False)
+
+    assert torch.allclose(default, w_activation,
+                          atol=1e-2), "Default should use activation."
+    assert not torch.allclose(
+        w_activation, wo_activation,
+        atol=1e-2), "wo_activation should not use activation."
+    assert torch.allclose(
+        softmax(wo_activation), w_activation, atol=1e-2
+    ), "w_activation should be close to activation(wo_activation)."
diff --git a/tests/entrypoints/llm/test_embedding.py b/tests/entrypoints/llm/test_embedding.py
new file mode 100644
index 0000000000000..ba20d7b9548ef
--- /dev/null
+++ b/tests/entrypoints/llm/test_embedding.py
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import weakref
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+from vllm import LLM, PoolingParams
+from vllm.distributed import cleanup_dist_env_and_memory
+
+MODEL_NAME = "intfloat/multilingual-e5-small"
+
+prompts = ["The chef prepared a delicious meal."]
+
+
+@pytest.fixture(scope="module")
+def llm():
+    # pytest caches the fixture so we use weakref.proxy to
+    # enable garbage collection
+    llm = LLM(model=MODEL_NAME,
+              max_num_batched_tokens=32768,
+              tensor_parallel_size=1,
+              gpu_memory_utilization=0.75,
+              enforce_eager=True,
+              seed=0)
+
+    with llm.deprecate_legacy_api():
+        yield weakref.proxy(llm)
+
+        del llm
+
+    cleanup_dist_env_and_memory()
+
+
+@pytest.mark.skip_global_cleanup
+def test_pooling_params(llm: LLM):
+
+    def get_outputs(normalize):
+        outputs = llm.embed(prompts,
+                            pooling_params=PoolingParams(normalize=normalize),
+                            use_tqdm=False)
+        return torch.tensor([x.outputs.embedding for x in outputs])
+
+    default = get_outputs(normalize=None)
+    w_normal = get_outputs(normalize=True)
+    wo_normal = get_outputs(normalize=False)
+
+    assert torch.allclose(default, w_normal,
+                          atol=1e-2), "Default should use normal."
+    assert not torch.allclose(w_normal, wo_normal,
+                              atol=1e-2), "wo_normal should not use normal."
+    assert torch.allclose(
+        w_normal, F.normalize(wo_normal, p=2, dim=-1),
+        atol=1e-2), "w_normal should be close to normal(wo_normal)."
diff --git a/tests/entrypoints/llm/test_reward.py b/tests/entrypoints/llm/test_reward.py
new file mode 100644
index 0000000000000..361e2d0e1047f
--- /dev/null
+++ b/tests/entrypoints/llm/test_reward.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import weakref
+
+import pytest
+import torch
+
+from vllm import LLM, PoolingParams
+from vllm.distributed import cleanup_dist_env_and_memory
+
+from ...models.utils import softmax
+
+MODEL_NAME = "internlm/internlm2-1_8b-reward"
+
+prompts = ["The chef prepared a delicious meal."]
+
+
+@pytest.fixture(autouse=True)
+def v1(run_with_both_engines):
+    # Simple autouse wrapper to run both engines for each test
+    # This can be promoted up to conftest.py to run for every
+    # test in a package
+    pass
+
+
+@pytest.fixture(scope="module")
+def llm():
+    # pytest caches the fixture so we use weakref.proxy to
+    # enable garbage collection
+    llm = LLM(model=MODEL_NAME,
+              max_num_batched_tokens=32768,
+              tensor_parallel_size=1,
+              gpu_memory_utilization=0.75,
+              enforce_eager=True,
+              trust_remote_code=True,
+              seed=0)
+
+    with llm.deprecate_legacy_api():
+        yield weakref.proxy(llm)
+
+        del llm
+
+    cleanup_dist_env_and_memory()
+
+
+@pytest.mark.skip_global_cleanup
+def test_pooling_params(llm: LLM):
+
+    def get_outputs(softmax):
+        outputs = llm.reward(prompts,
+                             pooling_params=PoolingParams(softmax=softmax),
+                             use_tqdm=False)
+        return torch.cat([x.outputs.data for x in outputs])
+
+    default = get_outputs(softmax=None)
+    w_softmax = get_outputs(softmax=True)
+    wo_softmax = get_outputs(softmax=False)
+
+    assert torch.allclose(default, w_softmax,
+                          atol=1e-2), "Default should use softmax."
+    assert not torch.allclose(w_softmax, wo_softmax,
+                              atol=1e-2), "wo_softmax should not use softmax."
+    assert torch.allclose(
+        softmax(wo_softmax), w_softmax,
+        atol=1e-2), "w_softmax should be close to softmax(wo_softmax)."
diff --git a/tests/entrypoints/llm/test_score.py b/tests/entrypoints/llm/test_score.py
new file mode 100644
index 0000000000000..dd4eae0ccc06e
--- /dev/null
+++ b/tests/entrypoints/llm/test_score.py
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import weakref
+
+import pytest
+import torch
+
+from vllm import LLM, PoolingParams
+from vllm.distributed import cleanup_dist_env_and_memory
+
+from ...models.utils import softmax
+
+MODEL_NAME = "tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
+
+
+@pytest.fixture(autouse=True)
+def v1(run_with_both_engines):
+    # Simple autouse wrapper to run both engines for each test
+    # This can be promoted up to conftest.py to run for every
+    # test in a package
+    pass
+
+
+@pytest.fixture(scope="module")
+def llm():
+    # pytest caches the fixture so we use weakref.proxy to
+    # enable garbage collection
+    llm = LLM(model=MODEL_NAME,
+              max_num_batched_tokens=32768,
+              tensor_parallel_size=1,
+              gpu_memory_utilization=0.75,
+              enforce_eager=True,
+              seed=0)
+
+    with llm.deprecate_legacy_api():
+        yield weakref.proxy(llm)
+
+        del llm
+
+    cleanup_dist_env_and_memory()
+
+
+@pytest.mark.skip_global_cleanup
+def test_pooling_params(llm: LLM):
+
+    def get_outputs(activation):
+        text_1 = "What is the capital of France?"
+        text_2 = "The capital of France is Paris."
+
+        outputs = llm.score(
+            text_1,
+            text_2,
+            pooling_params=PoolingParams(activation=activation),
+            use_tqdm=False)
+        return torch.tensor([x.outputs.score for x in outputs])
+
+    default = get_outputs(activation=None)
+    w_activation = get_outputs(activation=True)
+    wo_activation = get_outputs(activation=False)
+
+    assert torch.allclose(default, w_activation,
+                          atol=1e-2), "Default should use activation."
+    assert not torch.allclose(
+        w_activation, wo_activation,
+        atol=1e-2), "wo_activation should not use activation."
+    assert torch.allclose(
+        softmax(wo_activation), w_activation, atol=1e-2
+    ), "w_activation should be close to activation(wo_activation)."
diff --git a/tests/entrypoints/openai/test_classification.py b/tests/entrypoints/openai/test_classification.py
index b2472658ca81c..bcf127307f730 100644
--- a/tests/entrypoints/openai/test_classification.py
+++ b/tests/entrypoints/openai/test_classification.py
@@ -3,6 +3,8 @@
 
 import pytest
 import requests
+import torch
+import torch.nn.functional as F
 
 from vllm.entrypoints.openai.protocol import ClassificationResponse
 
@@ -181,3 +183,32 @@ async def test_invocations(server: RemoteOpenAIServer):
         assert classification_data.keys() == invocation_data.keys()
         assert classification_data["probs"] == pytest.approx(
             invocation_data["probs"], rel=0.01)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_activation(server: RemoteOpenAIServer, model_name: str):
+    input_text = ["This product was excellent and exceeded my expectations"]
+
+    async def get_outputs(activation):
+        response = requests.post(server.url_for("classify"),
+                                 json={
+                                     "model": model_name,
+                                     "input": input_text,
+                                     "activation": activation
+                                 })
+        outputs = response.json()
+        return torch.tensor([x['probs'] for x in outputs["data"]])
+
+    default = await get_outputs(activation=None)
+    w_activation = await get_outputs(activation=True)
+    wo_activation = await get_outputs(activation=False)
+
+    assert torch.allclose(default, w_activation,
+                          atol=1e-2), "Default should use activation."
+    assert not torch.allclose(
+        w_activation, wo_activation,
+        atol=1e-2), "wo_activation should not use activation."
+    assert torch.allclose(
+        F.softmax(wo_activation, dim=-1), w_activation, atol=1e-2
+    ), "w_activation should be close to activation(wo_activation)."
diff --git a/tests/entrypoints/openai/test_embedding.py b/tests/entrypoints/openai/test_embedding.py
index a7203befcc402..cf2442a569388 100644
--- a/tests/entrypoints/openai/test_embedding.py
+++ b/tests/entrypoints/openai/test_embedding.py
@@ -8,6 +8,8 @@ import openai
 import pytest
 import pytest_asyncio
 import requests
+import torch
+import torch.nn.functional as F
 
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 from vllm.transformers_utils.tokenizer import get_tokenizer
@@ -369,3 +371,35 @@ async def test_invocations_conversation(server: RemoteOpenAIServer):
                                embeddings_1_lst=[invocation_data["embedding"]],
                                name_0="chat",
                                name_1="invocation")
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_normalize(server: RemoteOpenAIServer, model_name: str):
+    input_text = ["The chef prepared a delicious meal."]
+
+    async def get_outputs(normalize):
+        request_args = {
+            "model": MODEL_NAME,
+            "input": input_text,
+            "encoding_format": "float",
+            "normalize": normalize
+        }
+
+        response = requests.post(server.url_for("v1/embeddings"),
+                                 json=request_args)
+        outputs = response.json()
+
+        return torch.tensor([x['embedding'] for x in outputs["data"]])
+
+    default = await get_outputs(normalize=None)
+    w_normal = await get_outputs(normalize=True)
+    wo_normal = await get_outputs(normalize=False)
+
+    assert torch.allclose(default, w_normal,
+                          atol=1e-2), "Default should use normal."
+    assert not torch.allclose(w_normal, wo_normal,
+                              atol=1e-2), "wo_normal should not use normal."
+    assert torch.allclose(
+        w_normal, F.normalize(wo_normal, p=2, dim=-1),
+        atol=1e-2), "w_normal should be close to normal(wo_normal)."
diff --git a/tests/entrypoints/openai/test_rerank.py b/tests/entrypoints/openai/test_rerank.py
index 4da97fe13691b..f121693e329fa 100644
--- a/tests/entrypoints/openai/test_rerank.py
+++ b/tests/entrypoints/openai/test_rerank.py
@@ -3,6 +3,8 @@
 
 import pytest
 import requests
+import torch
+import torch.nn.functional as F
 
 from vllm.entrypoints.openai.protocol import RerankResponse
 
@@ -125,3 +127,39 @@ def test_invocations(server: RemoteOpenAIServer):
         assert rerank_result.keys() == invocations_result.keys()
         assert rerank_result["relevance_score"] == pytest.approx(
             invocations_result["relevance_score"], rel=0.01)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_activation(server: RemoteOpenAIServer, model_name: str):
+
+    async def get_outputs(activation):
+        query = "What is the capital of France?"
+        documents = [
+            "The capital of Brazil is Brasilia.",
+            "The capital of France is Paris."
+        ]
+
+        response = requests.post(server.url_for("rerank"),
+                                 json={
+                                     "model": model_name,
+                                     "query": query,
+                                     "documents": documents,
+                                     "activation": activation
+                                 })
+        outputs = response.json()
+
+        return torch.tensor([x['relevance_score'] for x in outputs["results"]])
+
+    default = await get_outputs(activation=None)
+    w_activation = await get_outputs(activation=True)
+    wo_activation = await get_outputs(activation=False)
+
+    assert torch.allclose(default, w_activation,
+                          atol=1e-2), "Default should use activation."
+    assert not torch.allclose(
+        w_activation, wo_activation,
+        atol=1e-2), "wo_activation should not use activation."
+    assert torch.allclose(
+        F.sigmoid(wo_activation), w_activation, atol=1e-2
+    ), "w_activation should be close to activation(wo_activation)."
diff --git a/tests/entrypoints/openai/test_score.py b/tests/entrypoints/openai/test_score.py
index 187542b7bafc9..1a5df1d2dbd2d 100644
--- a/tests/entrypoints/openai/test_score.py
+++ b/tests/entrypoints/openai/test_score.py
@@ -4,6 +4,7 @@ from typing import Any
 
 import pytest
 import requests
+import torch
 import torch.nn.functional as F
 from torch import tensor
 
@@ -220,3 +221,43 @@ class TestModel:
             assert score_data.keys() == invocation_data.keys()
             assert score_data["score"] == pytest.approx(
                 invocation_data["score"], rel=0.01)
+
+    def test_activation(self, server: RemoteOpenAIServer, model: dict[str,
+                                                                      Any]):
+
+        def get_outputs(activation):
+            text_1 = "What is the capital of France?"
+            text_2 = "The capital of France is Paris."
+            response = requests.post(server.url_for("score"),
+                                     json={
+                                         "model": model["name"],
+                                         "text_1": text_1,
+                                         "text_2": text_2,
+                                         "activation": activation
+                                     })
+            if response.status_code != 200:
+                return response
+
+            outputs = response.json()
+            return torch.tensor([x['score'] for x in outputs["data"]])
+
+        if model["is_cross_encoder"]:
+
+            default = get_outputs(activation=None)
+            w_activation = get_outputs(activation=True)
+            wo_activation = get_outputs(activation=False)
+
+            assert torch.allclose(default, w_activation,
+                                  atol=1e-2), "Default should use activation."
+            assert not torch.allclose(
+                w_activation, wo_activation,
+                atol=1e-2), "wo_activation should not use activation."
+            assert torch.allclose(
+                F.sigmoid(wo_activation), w_activation, atol=1e-2
+            ), "w_activation should be close to activation(wo_activation)."
+        else:
+            get_outputs(activation=None)
+
+            # The activation parameter only works for the is_cross_encoder model
+            response = get_outputs(activation=True)
+            assert response.status_code == 400
diff --git a/tests/models/language/pooling/test_override_pooler_config.py b/tests/models/language/pooling/test_override_pooler_config.py
new file mode 100644
index 0000000000000..2b1c74652e76f
--- /dev/null
+++ b/tests/models/language/pooling/test_override_pooler_config.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import torch
+import torch.nn.functional as F
+
+from tests.models.utils import softmax
+from vllm.config import PoolerConfig
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "jason9693/Qwen2.5-1.5B-apeach",
+        "papluca/xlm-roberta-base-language-detection"
+    ],
+)
+@pytest.mark.parametrize("dtype", ["half"])
+def test_classify_models_using_activation(
+    hf_runner,
+    vllm_runner,
+    example_prompts,
+    model: str,
+    dtype: str,
+) -> None:
+
+    with vllm_runner(model,
+                     max_model_len=512,
+                     dtype=dtype,
+                     override_pooler_config=PoolerConfig(
+                         activation=False)) as vllm_model:
+        wo_activation_out = vllm_model.classify(example_prompts)
+
+    with vllm_runner(model,
+                     max_model_len=512,
+                     dtype=dtype,
+                     override_pooler_config=PoolerConfig(
+                         activation=True)) as vllm_model:
+        w_activation_out = vllm_model.classify(example_prompts)
+
+    for wo_activation, w_activation in zip(wo_activation_out,
+                                           w_activation_out):
+        wo_activation = torch.tensor(wo_activation)
+        w_activation = torch.tensor(w_activation)
+
+        assert not torch.allclose(
+            wo_activation, w_activation,
+            atol=1e-2), "override_pooler_config is not working"
+        assert torch.allclose(softmax(wo_activation), w_activation,
+                              1e-3 if dtype == "float" else 1e-2)
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "intfloat/multilingual-e5-small",
+    ],
+)
+@pytest.mark.parametrize("dtype", ["half"])
+def test_embed_models_using_normalize(
+    hf_runner,
+    vllm_runner,
+    example_prompts,
+    model: str,
+    dtype: str,
+) -> None:
+
+    with vllm_runner(model,
+                     max_model_len=512,
+                     dtype=dtype,
+                     override_pooler_config=PoolerConfig(
+                         normalize=False)) as vllm_model:
+        wo_normalize = torch.tensor(vllm_model.embed(example_prompts))
+
+    with vllm_runner(
+            model,
+            max_model_len=512,
+            dtype=dtype,
+            override_pooler_config=PoolerConfig(normalize=True)) as vllm_model:
+        w_normalize = torch.tensor(vllm_model.embed(example_prompts))
+
+    assert not torch.allclose(
+        wo_normalize, w_normalize,
+        atol=1e-2), "override_pooler_config normalize is not working"
+    assert torch.allclose(
+        F.normalize(wo_normalize, p=2, dim=-1), w_normalize,
+        atol=1e-2), "w_normal should be close to normal(wo_normal)."
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "internlm/internlm2-1_8b-reward",
+    ],
+)
+@pytest.mark.parametrize("dtype", ["half"])
+def test_reward_models_using_softmax(
+    hf_runner,
+    vllm_runner,
+    example_prompts,
+    model: str,
+    dtype: str,
+) -> None:
+
+    with vllm_runner(
+            model,
+            max_model_len=1024,
+            dtype=dtype,
+            override_pooler_config=PoolerConfig(softmax=False)) as vllm_model:
+        wo_softmax = vllm_model.encode(example_prompts)
+
+    with vllm_runner(
+            model,
+            max_model_len=1024,
+            dtype=dtype,
+            override_pooler_config=PoolerConfig(softmax=True)) as vllm_model:
+        w_softmax = vllm_model.encode(example_prompts)
+
+    for wo, w in zip(wo_softmax, w_softmax):
+        wo = torch.tensor(wo)
+        w = torch.tensor(w)
+
+        assert not torch.allclose(
+            wo, w, atol=1e-2), "override_pooler_config softmax is not working"
+        assert torch.allclose(
+            softmax(wo), w,
+            atol=1e-2), "w_softmax should be close to softmax(wo_softmax)."
diff --git a/tests/models/language/pooling/test_reward.py b/tests/models/language/pooling/test_reward.py
index a5f7dca76d822..7add1d975c634 100644
--- a/tests/models/language/pooling/test_reward.py
+++ b/tests/models/language/pooling/test_reward.py
@@ -103,7 +103,7 @@ def test_prm_models(
 
     # check logits difference
     for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
-        hf_output = torch.tensor(hf_output)
-        vllm_output = torch.tensor(vllm_output)
+        hf_output = torch.tensor(hf_output).float()
+        vllm_output = torch.tensor(vllm_output).float()
 
         assert torch.allclose(hf_output, vllm_output, 1.5e-2)
diff --git a/tests/models/utils.py b/tests/models/utils.py
index 3cd0721be1b65..bda7ea3e3ad51 100644
--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@@ -330,6 +330,13 @@ def matryoshka_fy(tensor: torch.Tensor, dimensions: int):
     return tensor
 
 
+def softmax(data):
+    if data.shape[-1] == 1:
+        return F.sigmoid(data)
+    else:
+        return F.softmax(data, dim=-1)
+
+
 class EmbedModelInfo(NamedTuple):
     name: str
     is_matryoshka: bool = False
diff --git a/tests/test_pooling_params.py b/tests/test_pooling_params.py
new file mode 100644
index 0000000000000..52c03015483c9
--- /dev/null
+++ b/tests/test_pooling_params.py
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+
+from tests.models.utils import EmbedModelInfo
+from vllm import PoolingParams
+from vllm.config import ModelConfig
+
+EMBEDDING_MODELS = [
+    EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False),
+    EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v1.5",
+                   is_matryoshka=True,
+                   matryoshka_dimensions=[256]),
+]
+
+
+def test_task():
+    pooling_params = PoolingParams()
+    pooling_params.verify(task="score")
+
+    pooling_params = PoolingParams(task="score")
+    pooling_params.verify(task="score")
+
+    with pytest.raises(ValueError):
+        pooling_params.verify(task="encode")
+
+
+def test_embed():
+    task = "embed"
+    pooling_params = PoolingParams(normalize=None)
+    pooling_params.verify(task=task)
+
+    pooling_params = PoolingParams(normalize=True)
+    pooling_params.verify(task=task)
+
+    pooling_params = PoolingParams(normalize=False)
+    pooling_params.verify(task=task)
+
+    invalid_parameters = ["activation", "softmax"]
+    for p in invalid_parameters:
+        with pytest.raises(ValueError):
+            pooling_params = PoolingParams(**{p: True})
+            pooling_params.verify(task=task)
+
+
+@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
+def test_embed_dimensions(model_info: EmbedModelInfo):
+    task = "embed"
+    model_config = ModelConfig(
+        model_info.name,
+        task="auto",
+        tokenizer=model_info.name,
+        tokenizer_mode="auto",
+        trust_remote_code=False,
+        seed=0,
+        dtype="float16",
+    )
+
+    pooling_params = PoolingParams(dimensions=None)
+    pooling_params.verify(task=task, model_config=model_config)
+
+    with pytest.raises(ValueError):
+        pooling_params = PoolingParams(dimensions=1)
+        pooling_params.verify(task=task, model_config=model_config)
+
+    if model_info.is_matryoshka:
+        assert model_info.matryoshka_dimensions is not None
+        pooling_params = PoolingParams(
+            dimensions=model_info.matryoshka_dimensions[0])
+        pooling_params.verify(task=task, model_config=model_config)
+
+
+@pytest.mark.parametrize("task", ["score", "classify"])
+def test_classify(task):
+    pooling_params = PoolingParams(activation=None)
+    pooling_params.verify(task=task)
+
+    pooling_params = PoolingParams(activation=True)
+    pooling_params.verify(task=task)
+
+    pooling_params = PoolingParams(activation=False)
+    pooling_params.verify(task=task)
+
+    invalid_parameters = ["dimensions", "normalize", "softmax"]
+    for p in invalid_parameters:
+        with pytest.raises(ValueError):
+            pooling_params = PoolingParams(**{p: True})
+            pooling_params.verify(task=task)
+
+
+def test_encode():
+    task = "encode"
+    pooling_params = PoolingParams(softmax=None)
+    pooling_params.verify(task=task)
+
+    pooling_params = PoolingParams(softmax=True)
+    pooling_params.verify(task=task)
+
+    pooling_params = PoolingParams(softmax=False)
+    pooling_params.verify(task=task)
+
+    invalid_parameters = ["dimensions", "normalize", "activation"]
+    for p in invalid_parameters:
+        with pytest.raises(ValueError):
+            pooling_params = PoolingParams(**{p: True})
+            pooling_params.verify(task=task)
diff --git a/vllm/config.py b/vllm/config.py
index 34952279c9d19..899862bf541e7 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -913,15 +913,6 @@ class ModelConfig:
                     if getattr(pooler_config, k) is None:
                         setattr(pooler_config, k, v)
 
-            if self.is_matryoshka:
-                if pooler_config.normalize is None:
-                    pooler_config.normalize = True
-                elif not pooler_config.normalize:
-                    raise ValueError(
-                        "`normalize` must be enabled (set to True) "
-                        "for models that are compatible with "
-                        "Matryoshka Representation.")
-
             return pooler_config
 
         return None
@@ -3438,25 +3429,34 @@ class PoolerConfig:
     [`vllm.model_executor.layers.pooler.PoolingType`][].
     """
 
+    ## for embeddings models
     normalize: Optional[bool] = None
     """
-    Whether to normalize the pooled outputs. Usually, this should be set to
-    ``True`` for embedding outputs.
+    Whether to normalize the embeddings outputs. 
+    """
+    dimensions: Optional[int] = None
+    """
+    Reduce the dimensions of embeddings if model 
+    support matryoshka representation.
     """
 
+    ## for classification models
+    activation: Optional[bool] = None
+    """
+    Whether to apply activation function to the classification outputs. 
+    """
+
+    ## for reward models
     softmax: Optional[bool] = None
     """
-    Whether to apply softmax to the pooled outputs. Usually, this should be set
-    to ``True`` for classification outputs.
+    Whether to apply softmax to the reward outputs. 
     """
-
     step_tag_id: Optional[int] = None
     """
     If set, only the score corresponding to the ``step_tag_id`` in the
     generated sentence should be returned. Otherwise, the scores for all tokens
     are returned.
     """
-
     returned_token_ids: Optional[list[int]] = None
     """
     A list of indices for the vocabulary dimensions to be extracted,
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 842a22ccebaa4..ca24b0c32b73b 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1189,6 +1189,8 @@ class LLM:
         /,
         *,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
+        pooling_params: Optional[Union[PoolingParams,
+                                       Sequence[PoolingParams]]] = None,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
     ) -> list[ClassificationRequestOutput]:
         """
@@ -1207,7 +1209,8 @@ class LLM:
                 it is used to create the progress bar.
                 If `False`, no progress bar is created.
             lora_request: LoRA request to use for generation, if any.
-
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
         Returns:
             A list of `ClassificationRequestOutput` objects containing the
             embedding vectors in the same order as the input prompts.
@@ -1220,6 +1223,7 @@ class LLM:
         items = self.encode(
             prompts,
             use_tqdm=use_tqdm,
+            pooling_params=pooling_params,
             lora_request=lora_request,
             pooling_task="classify",
         )
@@ -1272,6 +1276,7 @@ class LLM:
         text_2: list[Union[str, TextPrompt, TokensPrompt]],
         truncate_prompt_tokens: Optional[int] = None,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
+        pooling_params: Optional[PoolingParams] = None,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
     ) -> list[ScoringRequestOutput]:
 
@@ -1280,6 +1285,7 @@ class LLM:
             truncate_prompt_tokens=truncate_prompt_tokens,
             use_tqdm=use_tqdm,
             lora_request=lora_request,
+            pooling_params=pooling_params,
             pooling_task="embed",
         )
 
@@ -1306,6 +1312,7 @@ class LLM:
         data_2: Union[list[str], list[ScoreContentPartParam]],
         truncate_prompt_tokens: Optional[int] = None,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
+        pooling_params: Optional[PoolingParams] = None,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
     ) -> list[ScoringRequestOutput]:
         model_config = self.llm_engine.model_config
@@ -1317,7 +1324,12 @@ class LLM:
         if len(data_1) == 1:
             data_1 = data_1 * len(data_2)
 
-        pooling_params = PoolingParams(task="score")
+        if pooling_params is None:
+            pooling_params = PoolingParams(task="score")
+
+        model_config = self.llm_engine.model_config
+        pooling_params.verify("score", model_config)
+
         tokenization_kwargs: dict[str, Any] = {}
 
         _validate_truncation_size(model_config.max_model_len,
@@ -1379,6 +1391,7 @@ class LLM:
         *,
         truncate_prompt_tokens: Optional[int] = None,
         use_tqdm: Union[bool, Callable[..., tqdm]] = True,
+        pooling_params: Optional[PoolingParams] = None,
         lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
     ) -> list[ScoringRequestOutput]:
         """Generate similarity scores for all pairs `<text,text_pair>` or
@@ -1410,7 +1423,8 @@ class LLM:
                 it is used to create the progress bar.
                 If `False`, no progress bar is created.
             lora_request: LoRA request to use for generation, if any.
-
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
         Returns:
             A list of `ScoringRequestOutput` objects containing the
             generated scores in the same order as the input prompts.
@@ -1494,6 +1508,7 @@ class LLM:
                 data_2,  # type: ignore[arg-type]
                 truncate_prompt_tokens,
                 use_tqdm,
+                pooling_params,
                 lora_request)
         else:
             return self._embedding_score(
@@ -1502,6 +1517,7 @@ class LLM:
                 data_2,  # type: ignore[arg-type]
                 truncate_prompt_tokens,
                 use_tqdm,
+                pooling_params,
                 lora_request)
 
     def start_profile(self) -> None:
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index d77aee345843c..64f2beb14021a 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -1274,11 +1274,13 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
             "not set it, a random_uuid will be generated. This id is used "
             "through out the inference process and return in response."),
     )
+    normalize: Optional[bool] = None
 
     # --8<-- [end:embedding-extra-params]
 
     def to_pooling_params(self):
-        return PoolingParams(dimensions=self.dimensions)
+        return PoolingParams(dimensions=self.dimensions,
+                             normalize=self.normalize)
 
 
 class EmbeddingChatRequest(OpenAIBaseModel):
@@ -1332,6 +1334,7 @@ class EmbeddingChatRequest(OpenAIBaseModel):
             "not set it, a random_uuid will be generated. This id is used "
             "through out the inference process and return in response."),
     )
+    normalize: Optional[bool] = None
     # --8<-- [end:chat-embedding-extra-params]
 
     @model_validator(mode="before")
@@ -1344,7 +1347,8 @@ class EmbeddingChatRequest(OpenAIBaseModel):
         return data
 
     def to_pooling_params(self):
-        return PoolingParams(dimensions=self.dimensions)
+        return PoolingParams(dimensions=self.dimensions,
+                             normalize=self.normalize)
 
 
 EmbeddingRequest = Union[EmbeddingCompletionRequest, EmbeddingChatRequest]
@@ -1375,10 +1379,12 @@ class ScoreRequest(OpenAIBaseModel):
             "if the served model does not use priority scheduling."),
     )
 
+    activation: Optional[bool] = None
+
     # --8<-- [end:score-extra-params]
 
     def to_pooling_params(self):
-        return PoolingParams()
+        return PoolingParams(activation=self.activation)
 
 
 class RerankRequest(OpenAIBaseModel):
@@ -1403,10 +1409,12 @@ class RerankRequest(OpenAIBaseModel):
             "if the served model does not use priority scheduling."),
     )
 
+    activation: Optional[bool] = None
+
     # --8<-- [end:rerank-extra-params]
 
     def to_pooling_params(self):
-        return PoolingParams()
+        return PoolingParams(activation=self.activation)
 
 
 class RerankDocument(BaseModel):
@@ -1553,10 +1561,12 @@ class ClassificationRequest(OpenAIBaseModel):
             "if the served model does not use priority scheduling."),
     )
 
+    activation: Optional[bool] = None
+
     # --8<-- [end:classification-extra-params]
 
     def to_pooling_params(self):
-        return PoolingParams()
+        return PoolingParams(activation=self.activation)
 
 
 class ClassificationData(OpenAIBaseModel):
diff --git a/vllm/model_executor/layers/pooler.py b/vllm/model_executor/layers/pooler.py
index 5bfd4aaccc17c..0f2e58eb9b5d9 100644
--- a/vllm/model_executor/layers/pooler.py
+++ b/vllm/model_executor/layers/pooler.py
@@ -41,35 +41,18 @@ class PoolingType(IntEnum):
 @dataclass(frozen=True)
 class ResolvedPoolingConfig:
     pooling_type: PoolingType
-
-    normalize: bool
-    softmax: bool
-    step_tag_id: Optional[int]
-    returned_token_ids: Optional[list[int]]
+    task: PoolingTask
 
     @classmethod
     def from_config_with_defaults(
         cls,
+        task: PoolingTask,
         pooler_config: PoolerConfig,
         pooling_type: PoolingType,
-        normalize: bool,
-        softmax: bool,
-        step_tag_id: Optional[int] = None,
-        returned_token_ids: Optional[list[int]] = None,
     ) -> "ResolvedPoolingConfig":
-        return cls(
-            pooling_type=PoolingType[pooler_config.pooling_type]
-            if pooler_config.pooling_type is not None else pooling_type,
-            normalize=pooler_config.normalize
-            if pooler_config.normalize is not None else normalize,
-            softmax=pooler_config.softmax
-            if pooler_config.softmax is not None else softmax,
-            step_tag_id=pooler_config.step_tag_id
-            if pooler_config.step_tag_id is not None else step_tag_id,
-            returned_token_ids=pooler_config.returned_token_ids
-            if pooler_config.returned_token_ids is not None else
-            returned_token_ids,
-        )
+        return cls(task=task,
+                   pooling_type=PoolingType[pooler_config.pooling_type]
+                   if pooler_config.pooling_type is not None else pooling_type)
 
 
 @dataclass(frozen=True)
@@ -89,22 +72,15 @@ class Pooler(nn.Module, ABC):
         pooler_config: PoolerConfig,
         *,
         default_pooling_type: PoolingType = PoolingType.ALL,
-        default_normalize: bool = False,
-        default_softmax: bool = False,
-        default_step_tag_id: Optional[int] = None,
-        default_returned_token_ids: Optional[list[int]] = None,
     ):
         resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
+            task="encode",
             pooler_config=pooler_config,
             pooling_type=default_pooling_type,
-            normalize=default_normalize,
-            softmax=default_softmax,
-            step_tag_id=default_step_tag_id,
-            returned_token_ids=default_returned_token_ids,
         )
 
         if resolved_config.pooling_type == PoolingType.STEP:
-            return StepPooler.from_config(resolved_config)
+            return StepPooler()
 
         return SimplePooler.from_config(resolved_config)
 
@@ -113,14 +89,11 @@ class Pooler(nn.Module, ABC):
         pooler_config: PoolerConfig,
         *,
         default_pooling_type: PoolingType = PoolingType.LAST,
-        default_normalize: bool = True,
-        default_softmax: bool = False,
     ):
         resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
+            task="embed",
             pooler_config=pooler_config,
             pooling_type=default_pooling_type,
-            normalize=default_normalize,
-            softmax=default_softmax,
         )
 
         return SimplePooler.from_config(resolved_config)
@@ -131,23 +104,18 @@ class Pooler(nn.Module, ABC):
         classifier: Optional[ClassifierFn],
         *,
         default_pooling_type: PoolingType = PoolingType.LAST,
-        default_normalize: bool = False,
-        default_softmax: bool = True,
     ):
         resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
+            task="classify",
             pooler_config=pooler_config,
             pooling_type=default_pooling_type,
-            normalize=default_normalize,
-            softmax=default_softmax,
         )
-        base_pooler = SimplePooler.from_config(resolved_config)
-        if classifier is None:
-            return base_pooler
+
+        pooling = PoolingMethod.from_pooling_type(resolved_config.pooling_type)
 
         return ClassifierPooler(
-            pooling=base_pooler.pooling,
+            pooling=pooling,
             classifier=classifier,
-            act_fn=base_pooler.head.activation,
         )
 
     @abstractmethod
@@ -198,11 +166,17 @@ def get_prompt_token_ids(
     ]
 
 
-def get_tasks(pooling_metadata: PoolingMetadata) -> list[PoolingTask]:
+def get_pooling_params(
+        pooling_metadata: PoolingMetadata) -> list[PoolingParams]:
     if isinstance(pooling_metadata, V0PoolingMetadata):
         pooling_params = [p for _, p in pooling_metadata.seq_groups]
     else:
         pooling_params = pooling_metadata.pooling_params
+    return pooling_params
+
+
+def get_tasks(pooling_metadata: PoolingMetadata) -> list[PoolingTask]:
+    pooling_params = get_pooling_params(pooling_metadata)
 
     tasks: list[PoolingTask] = [
         task for pooling_param in pooling_params
@@ -484,49 +458,30 @@ class LambdaPoolerActivation(PoolerActivation):
 
 class PoolerHead(nn.Module):
 
-    @classmethod
-    def from_config(cls, pooler_config: ResolvedPoolingConfig) -> "PoolerHead":
-        if pooler_config.normalize and pooler_config.softmax:
-            raise ValueError("`normalize=True` and `softmax=True` should not "
-                             "be set together")
-
-        activation: PoolerActivation
-        if pooler_config.normalize:
-            activation = PoolerNormalize()
-        elif pooler_config.softmax:
-            activation = PoolerClassify()
-        else:
-            activation = PoolerIdentity()
-
-        return cls(activation)
-
     def __init__(self, activation: PoolerActivation) -> None:
         super().__init__()
-
         self.activation = activation
 
     def forward(self, pooled_data: Union[list[torch.Tensor], torch.Tensor],
                 pooling_metadata: PoolingMetadata):
 
-        # Using float32 in PoolerHead
-        if isinstance(pooled_data, list):
-            for i in range(len(pooled_data)):
-                pooled_data[i] = pooled_data[i].to(torch.float32)
-        else:
-            pooled_data = pooled_data.to(torch.float32)
+        return self.activation(pooled_data)
+
+
+class EmbeddingPoolerHead(PoolerHead):
+
+    def __init__(self) -> None:
+        super().__init__(activation=PoolerNormalize())
+
+    def forward(self, pooled_data: Union[list[torch.Tensor], torch.Tensor],
+                pooling_metadata: PoolingMetadata):
+
+        pooling_params = get_pooling_params(pooling_metadata)
 
         # for matryoshka representation
-        if isinstance(pooling_metadata, V0PoolingMetadata):
-            dimensions_list = [
-                pooling_param.dimensions
-                for _, pooling_param in pooling_metadata.seq_groups
-            ]
-        else:
-            assert isinstance(pooled_data, list)
-            dimensions_list = [
-                pooling_param.dimensions
-                for pooling_param in pooling_metadata.pooling_params
-            ]
+        dimensions_list = [
+            pooling_param.dimensions for pooling_param in pooling_params
+        ]
         if any(d is not None for d in dimensions_list):
             # change the output dimension
             assert len(pooled_data) == len(dimensions_list)
@@ -541,7 +496,41 @@ class PoolerHead(nn.Module):
                     for vecs, d in zip(pooled_data, dimensions_list)
                 ]
 
-        return self.activation(pooled_data)
+        # for normalize
+        flags = [p.normalize for p in pooling_params]
+        if len(set(flags)) == 1:
+            if flags[0]:
+                pooled_data = self.activation(pooled_data)
+        else:
+            pooled_data = [
+                self.activation(vecs) if f else vecs
+                for vecs, f in zip(pooled_data, flags)
+            ]
+
+        return pooled_data
+
+
+class RewardPoolerHead(PoolerHead):
+
+    def __init__(self) -> None:
+        super().__init__(activation=PoolerClassify())
+
+    def forward(self, pooled_data: Union[list[torch.Tensor], torch.Tensor],
+                pooling_metadata: PoolingMetadata):
+        pooling_params = get_pooling_params(pooling_metadata)
+
+        # for softmax
+        flags = [p.softmax for p in pooling_params]
+        if len(set(flags)) == 1:
+            if flags[0]:
+                pooled_data = self.activation(pooled_data)
+        else:
+            pooled_data = [
+                self.activation(vecs) if f else vecs
+                for vecs, f in zip(pooled_data, flags)
+            ]
+
+        return pooled_data
 
 
 class SimplePooler(Pooler):
@@ -559,8 +548,12 @@ class SimplePooler(Pooler):
         pooler_config: ResolvedPoolingConfig,
     ) -> "SimplePooler":
         pooling = PoolingMethod.from_pooling_type(pooler_config.pooling_type)
-        head = PoolerHead.from_config(pooler_config)
-
+        if pooler_config.task == "embed":
+            head = EmbeddingPoolerHead()
+        elif pooler_config.task == "encode":
+            head = RewardPoolerHead()
+        else:
+            raise NotImplementedError(f"Unknown task: {pooler_config.task}")
         return cls(pooling, head)
 
     def __init__(self, pooling: PoolingMethod, head: PoolerHead) -> None:
@@ -587,29 +580,11 @@ class SimplePooler(Pooler):
 
 class StepPooler(Pooler):
 
-    @classmethod
-    def from_config(cls, pooler_config: ResolvedPoolingConfig) -> "StepPooler":
-        assert pooler_config.pooling_type == PoolingType.STEP
-
-        return cls(
-            PoolerHead.from_config(pooler_config),
-            step_tag_id=pooler_config.step_tag_id,
-            returned_token_ids=pooler_config.returned_token_ids,
-        )
-
-    def __init__(
-        self,
-        head: PoolerHead,
-        *,
-        step_tag_id: Optional[int] = None,
-        returned_token_ids: Optional[list[int]] = None,
-    ) -> None:
+    def __init__(self, ) -> None:
         super().__init__()
 
         self.pooling = AllPool()
-        self.head = head
-        self.step_tag_id = step_tag_id
-        self.returned_token_ids = returned_token_ids
+        self.head = RewardPoolerHead()
 
     def extract_states(
         self,
@@ -620,10 +595,15 @@ class StepPooler(Pooler):
         prompt_token_ids = get_prompt_token_ids(pooling_metadata)
 
         pooled_data = list[torch.Tensor]()
-        returned_token_ids = self.returned_token_ids
-        step_tag_id = self.step_tag_id
 
-        for data, token_id in zip(pooled_data_lst, prompt_token_ids):
+        pooling_params = get_pooling_params(pooling_metadata)
+
+        for data, token_id, pooling_param in zip(pooled_data_lst,
+                                                 prompt_token_ids,
+                                                 pooling_params):
+            step_tag_id = pooling_param.step_tag_id
+            returned_token_ids = pooling_param.returned_token_ids
+
             if returned_token_ids is not None and len(returned_token_ids) > 0:
                 data = data[:, returned_token_ids]
 
@@ -669,14 +649,14 @@ class ClassifierPooler(Pooler):
     def __init__(
         self,
         pooling: PoolingFn,
-        classifier: ClassifierFn,
-        act_fn: PoolerActivation,
+        classifier: Optional[ClassifierFn],
+        act_fn: Optional[PoolerActivation] = None,
     ) -> None:
         super().__init__()
 
         self.pooling = pooling
         self.classifier = classifier
-        self.act_fn = act_fn
+        self.act_fn = act_fn or PoolerClassify()
 
     def get_supported_tasks(self) -> Set[PoolingTask]:
         return {"classify", "score"}
@@ -688,15 +668,25 @@ class ClassifierPooler(Pooler):
     ) -> PoolerOutput:
         pooled_data = self.pooling(hidden_states, pooling_metadata)
 
-        # apply classifier once on the full batch if possible
-        if isinstance(pooled_data, torch.Tensor):
-            pooled_output = self.classifier(pooled_data)
-        elif len({data.shape for data in pooled_data}) <= 1:
-            pooled_output = self.classifier(torch.stack(pooled_data))
-        else:
-            pooled_output = [self.classifier(data) for data in pooled_data]
+        if self.classifier is not None:
+            # apply classifier once on the full batch if possible
+            if isinstance(pooled_data, torch.Tensor):
+                pooled_data = self.classifier(pooled_data)
+            elif len({data.shape for data in pooled_data}) <= 1:
+                pooled_data = self.classifier(torch.stack(pooled_data))
+            else:
+                pooled_data = [self.classifier(data) for data in pooled_data]
 
-        scores = self.act_fn(pooled_output)
+        pooling_params = get_pooling_params(pooling_metadata)
+        flags = [p.activation for p in pooling_params]
+
+        if len(set(flags)) == 1:
+            scores = self.act_fn(pooled_data) if flags[0] else pooled_data
+        else:
+            scores = [
+                self.act_fn(vecs) if f else vecs
+                for vecs, f in zip(pooled_data, flags)
+            ]
 
         return build_output(scores)
 
diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py
index 9030ff307bee3..6f09be7a59410 100644
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -44,6 +44,15 @@ class GteNewModelConfig(VerifyAndUpdateConfig):
         }
 
 
+class JambaForSequenceClassificationConfig(VerifyAndUpdateConfig):
+
+    @staticmethod
+    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
+        pooler_config = vllm_config.model_config.pooler_config
+        if pooler_config.activation is None:
+            pooler_config.activation = False
+
+
 class JinaRobertaModelConfig(VerifyAndUpdateConfig):
 
     @staticmethod
@@ -155,6 +164,26 @@ class NomicBertModelConfig(VerifyAndUpdateConfig):
             vllm_config.recalculate_max_model_len(max_model_len)
 
 
+class Qwen2ForProcessRewardModelConfig(VerifyAndUpdateConfig):
+
+    @staticmethod
+    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
+        pooler_config = vllm_config.model_config.pooler_config
+
+        if pooler_config.step_tag_id is None:
+            pooler_config.step_tag_id = 151651
+
+
+class Qwen2ForRewardModelConfig(VerifyAndUpdateConfig):
+
+    @staticmethod
+    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
+        pooler_config = vllm_config.model_config.pooler_config
+
+        if pooler_config.softmax is None:
+            pooler_config.softmax = False
+
+
 class Qwen3ForSequenceClassificationConfig(VerifyAndUpdateConfig):
 
     @staticmethod
@@ -309,8 +338,11 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
     "GteModel": SnowflakeGteNewModelConfig,
     "GteNewModel": GteNewModelConfig,
     "NomicBertModel": NomicBertModelConfig,
+    "Qwen2ForProcessRewardModel": Qwen2ForProcessRewardModelConfig,
+    "Qwen2ForRewardModel": Qwen2ForRewardModelConfig,
     "Qwen3ForSequenceClassification": Qwen3ForSequenceClassificationConfig,
     "XLMRobertaModel": JinaRobertaModelConfig,
     "JinaVLForRanking": JinaVLForSequenceClassificationConfig,
+    "JambaForSequenceClassification": JambaForSequenceClassificationConfig,
     "GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig,
 }
diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py
index 263f4c8379cf2..ab21b7ce2c5f5 100644
--- a/vllm/model_executor/models/jamba.py
+++ b/vllm/model_executor/models/jamba.py
@@ -593,7 +593,5 @@ class JambaForSequenceClassification(JambaForCausalLM):
                 pooler_config,
                 classifier=self.score,
                 default_pooling_type=PoolingType.LAST,
-                default_normalize=False,
-                default_softmax=False,
             ),
         })
diff --git a/vllm/model_executor/models/jina_vl.py b/vllm/model_executor/models/jina_vl.py
index 0c4284f7daaac..8c64f636c6a0f 100644
--- a/vllm/model_executor/models/jina_vl.py
+++ b/vllm/model_executor/models/jina_vl.py
@@ -90,15 +90,12 @@ class JinaVLForSequenceClassification(Qwen2VLForConditionalGeneration,
                          prefix=maybe_prefix(prefix, "qwen2_vl"))
         config = vllm_config.model_config.hf_config
         pooler_config = vllm_config.model_config.pooler_config
+        assert pooler_config is not None
 
         # logit bias for sigmoid normalization
         self.LOGIT_BIAS = 2.65
 
         self.score = JinaVLScorer(config)
-
-        pooler_config = vllm_config.model_config.pooler_config
-        assert pooler_config is not None
-
         self.pooler = DispatchPooler({
             "encode":
             Pooler.for_encode(pooler_config),
diff --git a/vllm/model_executor/models/qwen2_rm.py b/vllm/model_executor/models/qwen2_rm.py
index f12e9a041a944..9b6b70c75c341 100644
--- a/vllm/model_executor/models/qwen2_rm.py
+++ b/vllm/model_executor/models/qwen2_rm.py
@@ -117,8 +117,5 @@ class Qwen2ForProcessRewardModel(Qwen2RewardBaseModel):
             Pooler.for_encode(
                 pooler_config,
                 default_pooling_type=PoolingType.STEP,
-                default_normalize=False,
-                default_softmax=True,
-                default_step_tag_id=151651,
             )
         })
diff --git a/vllm/pooling_params.py b/vllm/pooling_params.py
index 23eb775f2dc69..7077f68353fc5 100644
--- a/vllm/pooling_params.py
+++ b/vllm/pooling_params.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from copy import deepcopy
 from typing import TYPE_CHECKING, Optional
 
 import msgspec
@@ -19,13 +20,25 @@ class PoolingParams(
     """API parameters for pooling models.
 
     Attributes:
+        normalize: Whether to normalize the embeddings outputs.
         dimensions: Reduce the dimensions of embeddings
                     if model support matryoshka representation.
+        activation: Whether to apply activation function to
+                    the classification outputs.
+        softmax: Whether to apply softmax to the reward outputs.
     """
 
+    ## for embeddings models
     dimensions: Optional[int] = None
+    normalize: Optional[bool] = None
 
-    output_kind: RequestOutputKind = RequestOutputKind.FINAL_ONLY
+    ## for classification models
+    activation: Optional[bool] = None
+
+    ## for reward models
+    softmax: Optional[bool] = None
+    step_tag_id: Optional[int] = None
+    returned_token_ids: Optional[list[int]] = None
 
     task: Optional[PoolingTask] = None
     """Internal use only."""
@@ -33,15 +46,32 @@ class PoolingParams(
     requires_token_ids: bool = False
     """Internal use only."""
 
+    output_kind: RequestOutputKind = RequestOutputKind.FINAL_ONLY
+
+    @property
+    def all_parameters(self) -> list[str]:
+        return [
+            "dimensions", "normalize", "activation", "softmax", "step_tag_id",
+            "returned_token_ids"
+        ]
+
+    @property
+    def valid_parameters(self):
+        return {
+            "embed": ["dimensions", "normalize"],
+            "classify": ["activation"],
+            "score": ["activation"],
+            "encode": ["softmax", "step_tag_id", "returned_token_ids"],
+        }
+
     def clone(self) -> "PoolingParams":
         """Returns a deep copy of the PoolingParams instance."""
-        return PoolingParams(
-            dimensions=self.dimensions,
-            task=self.task,
-            requires_token_ids=self.requires_token_ids,
-        )
+        return deepcopy(self)
+
+    def verify(self,
+               task: PoolingTask,
+               model_config: Optional["ModelConfig"] = None) -> None:
 
-    def verify(self, task: PoolingTask, model_config: "ModelConfig") -> None:
         if self.task is None:
             self.task = task
         elif self.task != task:
@@ -52,28 +82,91 @@ class PoolingParams(
         # which is not available in model config. So, it's not included
         # in this method
 
-        if self.dimensions is not None:
-            if not model_config.is_matryoshka:
-                raise ValueError(
-                    f'Model "{model_config.served_model_name}" does not '
-                    f'support matryoshka representation, '
-                    f'changing output dimensions will lead to poor results.')
+        self._merge_default_parameters(model_config)
+        self._set_default_parameters(model_config)
+        self._verify_valid_parameters()
 
-            mds = model_config.matryoshka_dimensions
-            if mds is not None:
-                if self.dimensions not in mds:
+    def _merge_default_parameters(self,
+                                  model_config: Optional["ModelConfig"] = None
+                                  ) -> None:
+
+        if model_config is None:
+            return
+
+        pooler_config = model_config.pooler_config
+        if pooler_config is None:
+            return
+
+        assert self.task is not None, "task must be set"
+        valid_parameters = self.valid_parameters[self.task]
+
+        for k in valid_parameters:
+            if getattr(pooler_config, k, None) is None:
+                continue
+
+            if getattr(self, k, None) is None:
+                setattr(self, k, getattr(pooler_config, k))
+
+    def _set_default_parameters(self, model_config: Optional["ModelConfig"]):
+        if self.task == "embed":
+            if self.normalize is None:
+                self.normalize = True
+
+            if self.dimensions is not None and model_config is not None:
+                if not model_config.is_matryoshka:
                     raise ValueError(
-                        f'Model "{model_config.served_model_name}" '
-                        f'only supports {str(mds)} matryoshka dimensions, '
-                        f'use other output dimensions will '
-                        f'lead to poor results.')
-            elif self.dimensions < 1:
-                raise ValueError("Dimensions must be greater than 0")
+                        f'Model "{model_config.served_model_name}" does not '
+                        f'support matryoshka representation, '
+                        f'changing output dimensions will lead to poor results.'
+                    )
+
+                mds = model_config.matryoshka_dimensions
+                if mds is not None:
+                    if self.dimensions not in mds:
+                        raise ValueError(
+                            f'Model "{model_config.served_model_name}" '
+                            f'only supports {str(mds)} matryoshka dimensions, '
+                            f'use other output dimensions will '
+                            f'lead to poor results.')
+                elif self.dimensions < 1:
+                    raise ValueError("Dimensions must be greater than 0")
+
+        elif self.task in ["classify", "score"]:
+            if self.activation is None:
+                self.activation = True
+
+        elif self.task == "encode":
+            if self.softmax is None:
+                self.softmax = True
+        else:
+            raise ValueError(f"Unknown pooling task: {self.task}")
+
+    def _verify_valid_parameters(self):
+        assert self.task is not None, "task must be set"
+        valid_parameters = self.valid_parameters[self.task]
+        invalid_parameters = []
+        for k in self.all_parameters:
+            if k in valid_parameters:
+                continue
+
+            if getattr(self, k, None) is not None:
+                invalid_parameters.append(k)
+
+        if invalid_parameters:
+            raise ValueError(
+                f"Task {self.task} only supports {valid_parameters} "
+                f"parameters, does not support "
+                f"{invalid_parameters} parameters")
 
     def __repr__(self) -> str:
         return (f"PoolingParams("
-                f"dimensions={self.dimensions}, "
                 f"task={self.task}, "
+                f"normalize={self.normalize}, "
+                f"dimensions={self.dimensions}, "
+                f"activation={self.activation}, "
+                f"softmax={self.softmax}, "
+                f"step_tag_id={self.step_tag_id}, "
+                f"returned_token_ids={self.returned_token_ids}, "
                 f"requires_token_ids={self.requires_token_ids})")
 
     def __post_init__(self) -> None:

From d1bf1b97111df876737e3af3d9249c7ccc545f15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= <nlucches@redhat.com>
Date: Tue, 5 Aug 2025 11:33:46 +0200
Subject: [PATCH 216/224] [Docs][TPU] Highlight TPU Software version selection
 (#22242)

Signed-off-by: NickLucche <nlucches@redhat.com>
---
 docs/getting_started/installation/google_tpu.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/getting_started/installation/google_tpu.md b/docs/getting_started/installation/google_tpu.md
index 55d69d11fa401..6f09babb3aba0 100644
--- a/docs/getting_started/installation/google_tpu.md
+++ b/docs/getting_started/installation/google_tpu.md
@@ -85,7 +85,7 @@ gcloud alpha compute tpus queued-resources create QUEUED_RESOURCE_ID \
 | PROJECT_ID         | Your Google Cloud project                                                                                                                                                                                |
 | ZONE               | The GCP zone where you want to create your Cloud TPU. The value you use depends on the version of TPUs you are using. For more information, see [TPU regions and zones]                                  |
 | ACCELERATOR_TYPE   | The TPU version you want to use. Specify the TPU version, for example `v5litepod-4` specifies a v5e TPU with 4 cores, `v6e-1` specifies a v6e TPU with 1 core. For more information, see [TPU versions]. |
-| RUNTIME_VERSION    | The TPU VM runtime version to use. For example, use `v2-alpha-tpuv6e` for a VM loaded with one or more v6e TPU(s). For more information see [TPU VM images].                                             |
+| RUNTIME_VERSION    | The TPU VM runtime version to use. For example, use `v2-alpha-tpuv6e` for a VM loaded with one or more v6e TPU(s).                                              |
 | SERVICE_ACCOUNT    | The email address for your service account. You can find it in the IAM Cloud Console under *Service Accounts*. For example: `tpu-service-account@<your_project_ID>.iam.gserviceaccount.com`              |
 
 Connect to your TPU VM using SSH:
@@ -94,6 +94,9 @@ Connect to your TPU VM using SSH:
 gcloud compute tpus tpu-vm ssh TPU_NAME --project PROJECT_ID --zone ZONE
 ```
 
+!!! note
+    When configuring `RUNTIME_VERSION` ("TPU software version") on GCP, ensure it matches the TPU generation you've selected by referencing the [TPU VM images] compatibility matrix. Using an incompatible version may prevent vLLM from running correctly.
+
 [TPU versions]: https://cloud.google.com/tpu/docs/runtimes
 [TPU VM images]: https://cloud.google.com/tpu/docs/runtimes
 [TPU regions and zones]: https://cloud.google.com/tpu/docs/regions-zones

From 05fae021750be1049927299ea2317d742c03718a Mon Sep 17 00:00:00 2001
From: Benji Beck <benjibeck@meta.com>
Date: Tue, 5 Aug 2025 02:36:18 -0700
Subject: [PATCH 217/224] Migrate KimiVLImagePixelInputs to TensorSchema
 (#21769)

Signed-off-by: Benji Beck <benjibeck@meta.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
---
 vllm/model_executor/models/kimi_vl.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py
index 9c0a6ba92389b..1c7ddd7df7f82 100644
--- a/vllm/model_executor/models/kimi_vl.py
+++ b/vllm/model_executor/models/kimi_vl.py
@@ -46,7 +46,7 @@ import copy
 import math
 from collections.abc import Iterable, Mapping, Sequence
 from dataclasses import dataclass
-from typing import Any, Literal, Optional, TypedDict, Union
+from typing import Annotated, Any, Literal, Optional, Union
 
 import torch
 from torch import nn
@@ -79,6 +79,7 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.transformers_utils.configs import KimiVLConfig, MoonViTConfig
 from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekV2Config
+from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
 from .utils import is_pp_missing_parameter, maybe_prefix
 
@@ -118,15 +119,22 @@ class KimiVLMultiModalProjector(nn.Module):
         return hidden_states
 
 
-class KimiVLImagePixelInputs(TypedDict):
-    type: Literal["pixel_values"]
-    pixel_values: Union[torch.Tensor, list[torch.Tensor]]
+class KimiVLImagePixelInputs(TensorSchema):
     """
-    Shape:`(num_patches, num_channels, patch_size, patch_size)`
+    Dimensions:
+        - nc: Number of channels
+        - np: Number of patches
+        - ps: Patch size
+        - ni: Number of images
     """
+    type: Literal["pixel_values"] = "pixel_values"
 
-    image_grid_hws: torch.Tensor
-    """Shape:`(num_images, 2)`"""
+    pixel_values: Annotated[
+        Union[torch.Tensor, list[torch.Tensor]],
+        TensorShape("np", 3, "ps", "ps"),
+    ]
+
+    image_grid_hws: Annotated[torch.Tensor, TensorShape("ni", 2)]
 
 
 # TODO: support embeds too
@@ -348,8 +356,6 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal):
             pixel_values = pixel_values.reshape(-1, num_channels, patch_size,
                                                 patch_size)
         pixel_values = pixel_values.to(self.vision_tower.dtype)
-        # image_grid_hws.shape = (N, 2)
-        assert image_grid_hws.ndim == 2, f"unexpected shape for image_grid_hws: {image_grid_hws.shape}"
 
         return KimiVLImagePixelInputs(
             type="pixel_values",

From 4771df7b2bd1ed06fbdc564c98e1b86efaff69b3 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Tue, 5 Aug 2025 05:36:43 -0400
Subject: [PATCH 218/224] [Feature] Non-contiguous Support for FP8 Quantization
 (#21961)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
Co-authored-by: mgoin <mgoin64@gmail.com>
---
 csrc/quantization/fp8/common.cu  | 259 ++++++++++++++++++++-----------
 csrc/quantization/fp8/common.cuh | 107 -------------
 tests/quantization/test_fp8.py   |  33 ++++
 vllm/_custom_ops.py              |   9 +-
 4 files changed, 207 insertions(+), 201 deletions(-)

diff --git a/csrc/quantization/fp8/common.cu b/csrc/quantization/fp8/common.cu
index 0e1eab66f0b98..5fe5dd04bd891 100644
--- a/csrc/quantization/fp8/common.cu
+++ b/csrc/quantization/fp8/common.cu
@@ -1,7 +1,8 @@
 #include "common.cuh"
 #include "dispatch_utils.h"
-
+#include "../vectorization_utils.cuh"
 #include <c10/cuda/CUDAGuard.h>
+#include <ATen/cuda/Exceptions.h>
 
 #ifndef USE_ROCM
   #include <cub/cub.cuh>
@@ -12,74 +13,127 @@
 namespace vllm {
 
 template <typename scalar_t, typename fp8_type>
-__global__ void scaled_fp8_quant_kernel(fp8_type* __restrict__ out,
-                                        const scalar_t* __restrict__ input,
-                                        const float* __restrict__ scale,
-                                        int64_t num_elems) {
-  int tid = blockDim.x * blockIdx.x + threadIdx.x;
+__global__ void scaled_fp8_quant_kernel_strided(
+    fp8_type* __restrict__ out, const scalar_t* __restrict__ input,
+    const float* __restrict__ scale, int hidden_size, int64_t in_row_stride,
+    int64_t out_row_stride) {
+  const int64_t token_idx = blockIdx.x;  // one token per block
+  const int tid = threadIdx.x;
 
-  // Invert the scale so that we can use multiplications to avoid expensive
-  // division.
-  const float inverted_scale = 1.0f / (*scale);
-  scaled_fp8_conversion_vec<scalar_t, true>(
-      out, input, inverted_scale, num_elems, tid, blockDim.x * gridDim.x);
+  const scalar_t* token_in = input + token_idx * in_row_stride;
+  fp8_type* token_out = out + token_idx * out_row_stride;
+
+  const float inv_scale = 1.0f / (*scale);
+
+  vectorize_with_alignment<16>(
+      token_in, token_out, hidden_size, tid, blockDim.x,
+      [=] __device__(fp8_type & dst, const scalar_t& src) {
+        dst = scaled_fp8_conversion<true, fp8_type>(static_cast<float>(src),
+                                                    inv_scale);
+      });
 }
 
 template <typename scalar_t, typename fp8_type>
-__global__ void dynamic_per_token_scaled_fp8_quant_kernel(
-    fp8_type* __restrict__ out, float* __restrict__ scale,
-    scalar_t const* __restrict__ input, float const* __restrict__ scale_ub,
-    const int hidden_size) {
-  int const tid = threadIdx.x;
-  int const token_idx = blockIdx.x;
+__global__ void segmented_max_reduction_strided(
+    float* __restrict__ scale, const scalar_t* __restrict__ input,
+    int hidden_size, int64_t in_row_stride, int64_t num_tokens) {
+  __shared__ float cache[256];
+  const int tid = threadIdx.x;
+  int64_t token_idx = blockIdx.x;
 
-  // Use int64 to avoid overflowing an int32 when calculating this offset
-  int64_t offset = static_cast<int64_t>(token_idx) * hidden_size;
-  scalar_t const* __restrict__ token_input = &input[offset];
-  fp8_type* __restrict__ token_output = &out[offset];
-
-  // For vectorization, token_input and token_output pointers need to be
-  // aligned at 32-byte and 16-byte addresses respectively.
-  bool const can_vectorize = hidden_size % 16 == 0;
-
-  float absmax_val = 0.0f;
-  if (can_vectorize) {
-    absmax_val = thread_max_vec(token_input, hidden_size, tid, blockDim.x);
-  } else {
-    for (int i = tid; i < hidden_size; i += blockDim.x) {
-      float const x = static_cast<float>(token_input[i]);
-      absmax_val = fmaxf(absmax_val, fabsf(x));
-    }
+  // one block per token. Guard in case gridDim.x > num_tokens.
+  if (token_idx >= num_tokens) {
+    return;
   }
 
+  const scalar_t* row_ptr = input + token_idx * in_row_stride;
+
+  // each thread scans elements of the row in a strided fashion.
+  float thread_max = 0.0f;
+  for (int e = tid; e < hidden_size; e += blockDim.x) {
+    float v = fabsf(static_cast<float>(row_ptr[e]));
+    thread_max = fmaxf(thread_max, v);
+  }
+
+  cache[tid] = thread_max;
+  __syncthreads();
+
+  // parallel reduction to find row max.
+  for (int offset = blockDim.x / 2; offset > 0; offset >>= 1) {
+    if (tid < offset) {
+      cache[tid] = fmaxf(cache[tid], cache[tid + offset]);
+    }
+    __syncthreads();
+  }
+
+  // thread 0 updates global scale (per-tensor) atomically.
+  if (tid == 0) {
+    atomicMaxFloat(scale, cache[0] / quant_type_max_v<fp8_type>);
+  }
+}
+
+template <typename scalar_t, typename fp8_type>
+__global__ void scaled_fp8_quant_kernel_strided_dynamic(
+    fp8_type* __restrict__ out, const scalar_t* __restrict__ input,
+    const float* __restrict__ scale, int hidden_size, int64_t in_row_stride,
+    int64_t out_row_stride) {
+  const int64_t token_idx = blockIdx.x;
+  const int tid = threadIdx.x;
+
+  const scalar_t* token_in = input + token_idx * in_row_stride;
+  fp8_type* token_out = out + token_idx * out_row_stride;
+
+  const float reciprocal_scale = 1.0f / (*scale);
+  vectorize_with_alignment<16>(
+      token_in, token_out, hidden_size, tid, blockDim.x,
+      [=] __device__(fp8_type & dst, const scalar_t& src) {
+        dst = scaled_fp8_conversion<true, fp8_type>(static_cast<float>(src),
+                                                    reciprocal_scale);
+      });
+}
+
+template <typename scalar_t, typename fp8_type>
+__global__ void dynamic_per_token_scaled_fp8_quant_kernel_strided(
+    fp8_type* __restrict__ out, float* __restrict__ scale,
+    const scalar_t* __restrict__ input, const float* __restrict__ scale_ub,
+    int hidden_size, int64_t in_row_stride, int64_t out_row_stride) {
+  const int64_t token_idx = blockIdx.x;
+  const int tid = threadIdx.x;
+
+  // Use int64 to avoid overflowing an int32 when calculating this offset
+  int64_t in_offset = static_cast<int64_t>(token_idx) * in_row_stride;
+  int64_t out_offset = static_cast<int64_t>(token_idx) * out_row_stride;
+  const scalar_t* token_in = input + in_offset;
+  fp8_type* token_out = out + out_offset;
+
+  // 1) per-token absmax
+  float absmax_val = 0.f;
+  vectorize_read_with_alignment<16>(
+      token_in, hidden_size, tid, blockDim.x, [&] __device__(scalar_t v) {
+        absmax_val = fmaxf(absmax_val, fabsf(static_cast<float>(v)));
+      });
+
   using BlockReduce = cub::BlockReduce<float, 256>;
-  __shared__ typename BlockReduce::TempStorage reduceStorage;
-  float const block_absmax_val_maybe =
-      BlockReduce(reduceStorage).Reduce(absmax_val, cub::Max{}, blockDim.x);
+  __shared__ typename BlockReduce::TempStorage tmp;
+  const float block_max =
+      BlockReduce(tmp).Reduce(absmax_val, cub::Max{}, blockDim.x);
+
   __shared__ float token_scale;
   if (tid == 0) {
-    if (scale_ub) {
-      token_scale = fminf(block_absmax_val_maybe, *scale_ub);
-    } else {
-      token_scale = block_absmax_val_maybe;
-    }
-    // token scale computation
+    token_scale = scale_ub ? fminf(block_max, *scale_ub) : block_max;
     token_scale = fmaxf(token_scale / quant_type_max_v<fp8_type>,
                         min_scaling_factor<fp8_type>::val());
     scale[token_idx] = token_scale;
   }
   __syncthreads();
 
-  // Note that we don't use inverted scales so we can match FBGemm impl.
-  if (can_vectorize) {
-    scaled_fp8_conversion_vec<scalar_t, false>(
-        token_output, token_input, token_scale, hidden_size, tid, blockDim.x);
-  } else {
-    for (int i = tid; i < hidden_size; i += blockDim.x) {
-      token_output[i] = scaled_fp8_conversion<false, fp8_type>(
-          static_cast<float>(token_input[i]), token_scale);
-    }
-  }
+  // 2) quantize
+  vectorize_with_alignment<16>(
+      token_in, token_out, hidden_size, tid, blockDim.x,
+      [=] __device__(fp8_type & dst, const scalar_t& src) {
+        dst = scaled_fp8_conversion<false, fp8_type>(static_cast<float>(src),
+                                                     token_scale);
+      });
 }
 
 }  // namespace vllm
@@ -88,23 +142,31 @@ void static_scaled_fp8_quant(torch::Tensor& out,          // [..., d]
                              torch::Tensor const& input,  // [..., d]
                              torch::Tensor const& scale)  // [1]
 {
-  TORCH_CHECK(input.is_contiguous());
-  TORCH_CHECK(out.is_contiguous());
-  int const block_size = 256;
-  int const num_tokens = input.numel() / input.size(-1);
-  int const num_elems = input.numel();
-  dim3 const grid(num_tokens);
-  dim3 const block(block_size);
+  TORCH_CHECK(input.stride(-1) == 1,
+              "last dimension of input must be contiguous");
+  TORCH_CHECK(out.stride(-1) == 1,
+              "last dimension of output must be contiguous");
+
+  const int hidden_size = input.size(-1);
+  const int num_tokens = input.numel() / hidden_size;
+  const int block_size = 256;
+  dim3 grid(num_tokens);
+  dim3 block(block_size);
+
+  const int64_t in_row_stride = input.stride(-2);
+  const int64_t out_row_stride = out.stride(-2);
+
   const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
   const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
   VLLM_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "scaled_fp8_quant_kernel_scalar_type", [&] {
         VLLM_DISPATCH_FP8_TYPES(
             out.scalar_type(), "scaled_fp8_quant_kernel_fp8_type", [&] {
-              vllm::scaled_fp8_quant_kernel<scalar_t, fp8_t>
+              vllm::scaled_fp8_quant_kernel_strided<scalar_t, fp8_t>
                   <<<grid, block, 0, stream>>>(
                       out.data_ptr<fp8_t>(), input.data_ptr<scalar_t>(),
-                      scale.data_ptr<float>(), num_elems);
+                      scale.data_ptr<float>(), hidden_size, in_row_stride,
+                      out_row_stride);
             });
       });
 }
@@ -113,27 +175,42 @@ void dynamic_scaled_fp8_quant(torch::Tensor& out,          // [..., d]
                               torch::Tensor const& input,  // [..., d]
                               torch::Tensor& scale)        // [1]
 {
-  TORCH_CHECK(input.is_contiguous());
-  TORCH_CHECK(out.is_contiguous());
-  int const block_size = 256;
-  int const num_tokens = input.numel() / input.size(-1);
-  int const num_elems = input.numel();
-  dim3 const grid(num_tokens);
-  dim3 const block(block_size);
+  TORCH_CHECK(input.stride(-1) == 1,
+              "last dimension of input must be contiguous");
+  TORCH_CHECK(out.stride(-1) == 1,
+              "last dimension of output must be contiguous");
+
+  const int hidden_size = input.size(-1);
+  const int num_tokens = input.numel() / hidden_size;
+  const int block_size = 256;
+  dim3 grid(num_tokens);
+  dim3 block(block_size);
+
+  const int64_t in_row_stride = input.stride(-2);
+  const int64_t out_row_stride = out.stride(-2);
+
   const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
   const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // scale tensor should be initialised to <=0 before reduction
+  AT_CUDA_CHECK(
+      cudaMemsetAsync(scale.data_ptr<float>(), 0, sizeof(float), stream));
+
   VLLM_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "scaled_fp8_quant_kernel_scalar_type", [&] {
         VLLM_DISPATCH_FP8_TYPES(
             out.scalar_type(), "scaled_fp8_quant_kernel_fp8_type", [&] {
-              vllm::segmented_max_reduction<scalar_t, fp8_t>
-                  <<<grid, block, 0, stream>>>(scale.data_ptr<float>(),
-                                               input.data_ptr<scalar_t>(),
-                                               num_elems);
-              vllm::scaled_fp8_quant_kernel<scalar_t, fp8_t>
+              vllm::segmented_max_reduction_strided<scalar_t, fp8_t>
+                  <<<grid, block, 0, stream>>>(
+                      scale.data_ptr<float>(), input.data_ptr<scalar_t>(),
+                      hidden_size, in_row_stride,
+                      static_cast<int64_t>(num_tokens));
+
+              vllm::scaled_fp8_quant_kernel_strided_dynamic<scalar_t, fp8_t>
                   <<<grid, block, 0, stream>>>(
                       out.data_ptr<fp8_t>(), input.data_ptr<scalar_t>(),
-                      scale.data_ptr<float>(), num_elems);
+                      scale.data_ptr<float>(), hidden_size, in_row_stride,
+                      out_row_stride);
             });
       });
 }
@@ -142,14 +219,19 @@ void dynamic_per_token_scaled_fp8_quant(
     torch::Tensor& out,          // [..., d]
     torch::Tensor const& input,  // [..., d]
     torch::Tensor& scales, std::optional<at::Tensor> const& scale_ub) {
-  TORCH_CHECK(input.is_contiguous());
-  TORCH_CHECK(out.is_contiguous());
+  TORCH_CHECK(input.stride(-1) == 1,
+              "last dimension of input must be contiguous");
+  TORCH_CHECK(out.stride(-1) == 1,
+              "last dimension of output must be contiguous");
 
-  int const hidden_size = input.size(-1);
-  int const num_tokens = input.numel() / hidden_size;
-  int const block_size = 256;
-  dim3 const grid(num_tokens);
-  dim3 const block(std::min(hidden_size, block_size));
+  const int hidden_size = input.size(-1);
+  const int num_tokens = input.numel() / hidden_size;
+  const int block_size = 256;
+  dim3 grid(num_tokens);
+  dim3 block(std::min(hidden_size, block_size));
+
+  const int64_t in_row_stride = input.stride(-2);
+  const int64_t out_row_stride = out.stride(-2);
 
   const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
   const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
@@ -159,13 +241,12 @@ void dynamic_per_token_scaled_fp8_quant(
         VLLM_DISPATCH_FP8_TYPES(
             out.scalar_type(),
             "dynamic_per_token_scaled_fp8_quant_kernel_fp8_type", [&] {
-              vllm::dynamic_per_token_scaled_fp8_quant_kernel<scalar_t, fp8_t>
-                  <<<grid, block, 0, stream>>>(
-                      out.data_ptr<fp8_t>(), scales.data_ptr<float>(),
-                      input.data_ptr<scalar_t>(),
-                      scale_ub.has_value() ? scale_ub->data_ptr<float>()
-                                           : nullptr,
-                      hidden_size);
+              vllm::dynamic_per_token_scaled_fp8_quant_kernel_strided<
+                  scalar_t, fp8_t><<<grid, block, 0, stream>>>(
+                  out.data_ptr<fp8_t>(), scales.data_ptr<float>(),
+                  input.data_ptr<scalar_t>(),
+                  scale_ub.has_value() ? scale_ub->data_ptr<float>() : nullptr,
+                  hidden_size, in_row_stride, out_row_stride);
             });
       });
 }
diff --git a/csrc/quantization/fp8/common.cuh b/csrc/quantization/fp8/common.cuh
index d36f94a8f10d6..1aad6330c44b8 100644
--- a/csrc/quantization/fp8/common.cuh
+++ b/csrc/quantization/fp8/common.cuh
@@ -55,111 +55,4 @@ __device__ __forceinline__ fp8_type scaled_fp8_conversion(float const val,
 #endif
 }
 
-// Compute the absolute maximum m of the input tensor and store
-// m / float8_e4m3::max() in *scale. Each thread block performs a
-// reduction tree and the memory in scale is atomically updated.
-// So to get the right answer, *scale needs to be initialized to
-// a value <= 0.0 and we need to wait for all thread blocks to
-// finish before consuming *scale.
-template <typename scalar_t, typename fp8_type>
-__global__ void segmented_max_reduction(float* __restrict__ scale,
-                                        const scalar_t* __restrict__ input,
-                                        int64_t num_elems) {
-  __shared__ float cache[256];
-  int64_t i = blockDim.x * blockIdx.x + threadIdx.x;
-
-  // First store maximum for all values processes by
-  // the current thread in cache[threadIdx.x]
-  scalar_t tmp = 0.0;
-  while (i < num_elems) {
-    float x = static_cast<float>(input[i]);
-    tmp = fmaxf(tmp, fabsf(x));
-    i += blockDim.x * gridDim.x;
-  }
-  cache[threadIdx.x] = tmp;
-
-  __syncthreads();
-
-  // Now perform parallel reduction within the thread block
-  int ib = blockDim.x / 2;
-  while (ib != 0) {
-    if (threadIdx.x < ib && cache[threadIdx.x + ib] > cache[threadIdx.x]) {
-      cache[threadIdx.x] = cache[threadIdx.x + ib];
-    }
-    __syncthreads();
-    ib /= 2;
-  }
-  // Finally, since cache[0] contains the maximum for this thread block,
-  // atomically write the max to the target location
-  if (threadIdx.x == 0) {
-    atomicMaxFloat(scale, cache[0] / quant_type_max_v<fp8_type>);
-  }
-}
-
-template <typename scalar_t>
-__device__ float thread_max_vec(scalar_t const* __restrict__ input,
-                                int64_t const num_elems, int const tid,
-                                int const step) {
-  constexpr size_t VEC_SIZE = 16;
-  using scalarxN_t = vec_n_t<scalar_t, VEC_SIZE>;
-  // Vectorized input/output to better utilize memory bandwidth.
-  auto const* vectorized_in = reinterpret_cast<scalarxN_t const*>(input);
-
-  // num_elems / VEC_SIZE (which is 16)
-  int64_t const num_vec_elems = num_elems >> 4;
-  float absmax_val = 0.0f;
-
-#pragma unroll
-  for (int64_t i = tid; i < num_vec_elems; i += step) {
-    scalarxN_t in_vec = vectorized_in[i];
-#pragma unroll
-    for (int j = 0; j < VEC_SIZE; ++j) {
-      absmax_val = fmaxf(absmax_val, fabsf(in_vec.val[j]));
-    }
-  }
-
-  // Handle the remaining elements if num_elems is not divisible by VEC_SIZE
-  for (int64_t i = num_vec_elems * VEC_SIZE + tid; i < num_elems; i += step) {
-    absmax_val = fmaxf(absmax_val, fabsf(input[i]));
-  }
-
-  return absmax_val;
-}
-
-template <typename scalar_t, bool is_scale_inverted, typename fp8_type>
-__device__ void scaled_fp8_conversion_vec(fp8_type* __restrict__ out,
-                                          scalar_t const* __restrict__ input,
-                                          float const scale,
-                                          int64_t const num_elems,
-                                          int const tid, int const step) {
-  constexpr size_t VEC_SIZE = 16;
-  using scalarxN_t = vec_n_t<scalar_t, VEC_SIZE>;
-  using float8xN_t = q8_n_t<fp8_type, VEC_SIZE>;
-  // Vectorized input/output to better utilize memory bandwidth.
-  auto const* vectorized_in = reinterpret_cast<scalarxN_t const*>(input);
-  auto* vectorized_out = reinterpret_cast<float8xN_t*>(out);
-
-  // num_elems / VEC_SIZE (which is 16)
-  int64_t const num_vec_elems = num_elems >> 4;
-
-#pragma unroll
-  for (int64_t i = tid; i < num_vec_elems; i += step) {
-    scalarxN_t in_vec = vectorized_in[i];
-    float8xN_t out_vec;
-
-#pragma unroll
-    for (int j = 0; j < VEC_SIZE; ++j) {
-      out_vec.val[j] = scaled_fp8_conversion<is_scale_inverted, fp8_type>(
-          static_cast<float>(in_vec.val[j]), scale);
-    }
-    vectorized_out[i] = out_vec;
-  }
-
-  // Handle the remaining elements if num_elems is not divisible by VEC_SIZE
-  for (int64_t i = num_vec_elems * VEC_SIZE + tid; i < num_elems; i += step) {
-    out[i] = scaled_fp8_conversion<is_scale_inverted, fp8_type>(
-        static_cast<float>(input[i]), scale);
-  }
-}
-
 }  // namespace vllm
diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py
index e5ab7b3dd3cfb..0b37c83c92c2a 100644
--- a/tests/quantization/test_fp8.py
+++ b/tests/quantization/test_fp8.py
@@ -194,3 +194,36 @@ def test_scaled_fp8_quant(dtype) -> None:
         ref_y,
         per_tensor_dequantize(torch.narrow(y, 0, 0, x.shape[0]), inv_scale,
                               dtype))
+
+    # non-contiguous input with padding
+    m, n, padded_stride = 975, 512, 576
+    padded_tensor = (torch.randn(size=(m, padded_stride), device="cuda") *
+                     13).to(dtype)
+    x_nc = padded_tensor[:, :n]  # shape (m, n) with stride (padded_stride, 1)
+
+    assert not x_nc.is_contiguous()
+    assert x_nc.stride(0) == padded_stride
+
+    # dynamic quantization
+    ref_y_nc, inv_scale_nc = ops.scaled_fp8_quant(x_nc, None)
+    ref_y_nc = per_tensor_dequantize(ref_y_nc, inv_scale_nc, dtype)
+
+    # reference dynamic quantization
+    y_nc = quantize_ref(x_nc, inv_scale_nc)
+    torch.testing.assert_close(
+        ref_y_nc, per_tensor_dequantize(y_nc, inv_scale_nc, dtype))
+
+    # static quantization
+    y_nc, _ = ops.scaled_fp8_quant(x_nc, inv_scale_nc)
+    torch.testing.assert_close(
+        ref_y_nc, per_tensor_dequantize(y_nc, inv_scale_nc, dtype))
+
+    # padding after non-contiguous input quantization
+    y_nc_pad, _ = ops.scaled_fp8_quant(x_nc,
+                                       inv_scale_nc,
+                                       num_token_padding=m + 10)
+    assert y_nc_pad.shape[0] == m + 10
+    torch.testing.assert_close(
+        ref_y_nc,
+        per_tensor_dequantize(torch.narrow(y_nc_pad, 0, 0, x_nc.shape[0]),
+                              inv_scale_nc, dtype))
diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index 35345b1be01c2..e6f69e2344efa 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1279,14 +1279,13 @@ def scaled_fp8_quant(
                                 device=input.device,
                                 dtype=torch.float32)
             torch.ops._C.dynamic_per_token_scaled_fp8_quant(
-                output, input.contiguous(), scale, scale_ub)
+                output, input, scale, scale_ub)
         else:
-            scale = torch.zeros(1, device=input.device, dtype=torch.float32)
-            torch.ops._C.dynamic_scaled_fp8_quant(output, input.contiguous(),
-                                                  scale)
+            scale = torch.empty(1, device=input.device, dtype=torch.float32)
+            torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
     else:
         assert scale.numel() == 1, f"{scale.shape}"
-        torch.ops._C.static_scaled_fp8_quant(output, input.contiguous(), scale)
+        torch.ops._C.static_scaled_fp8_quant(output, input, scale)
 
     return output, scale
 

From 83156c7b89fb880744216f3475c99f698d67a4dc Mon Sep 17 00:00:00 2001
From: elvischenv <219235043+elvischenv@users.noreply.github.com>
Date: Tue, 5 Aug 2025 17:45:34 +0800
Subject: [PATCH 219/224] [NVIDIA] Support Flashinfer TRT-LLM Prefill Attention
 Kernel (#22095)

Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com>
---
 .buildkite/test-pipeline.yaml                 |   2 +-
 ...y => benchmark_trtllm_decode_attention.py} |   1 -
 .../benchmark_trtllm_prefill_attention.py     | 250 +++++++++++++++
 .../test_flashinfer_trtllm_attention.py       | 293 ++++++++++++++++++
 ...test_flashinfer_trtllm_decode_attention.py | 138 ---------
 vllm/attention/backends/flashinfer.py         |   4 +-
 vllm/envs.py                                  |   6 +-
 vllm/utils/flashinfer.py                      |  17 +-
 vllm/v1/attention/backends/flashinfer.py      | 223 ++++++++-----
 9 files changed, 700 insertions(+), 234 deletions(-)
 rename benchmarks/kernels/{benchmark_trtllm_attention.py => benchmark_trtllm_decode_attention.py} (99%)
 create mode 100644 benchmarks/kernels/benchmark_trtllm_prefill_attention.py
 create mode 100644 tests/kernels/attention/test_flashinfer_trtllm_attention.py
 delete mode 100644 tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index b7a2ca6ca9b24..e139c6b30586e 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -664,7 +664,7 @@ steps:
     # Attention
     # num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353
     - pytest -v -s tests/kernels/attention/test_flashinfer.py -k 'not num_heads2'
-    - pytest -v -s tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py
+    - pytest -v -s tests/kernels/attention/test_flashinfer_trtllm_attention.py
     - pytest -v -s tests/kernels/test_cutlass_mla_decode.py
     # Quantization
     - pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8'
diff --git a/benchmarks/kernels/benchmark_trtllm_attention.py b/benchmarks/kernels/benchmark_trtllm_decode_attention.py
similarity index 99%
rename from benchmarks/kernels/benchmark_trtllm_attention.py
rename to benchmarks/kernels/benchmark_trtllm_decode_attention.py
index 68c48858e61cc..77136edca45b5 100644
--- a/benchmarks/kernels/benchmark_trtllm_attention.py
+++ b/benchmarks/kernels/benchmark_trtllm_decode_attention.py
@@ -41,7 +41,6 @@ def benchmark_decode(
     device = "cuda"
     torch.manual_seed(0)
 
-    # Currently only HEAD_GRP_SIZE == 8 is supported
     HEAD_GRP_SIZE = 8
     MAX_SEQ_LEN = max_seq_len
 
diff --git a/benchmarks/kernels/benchmark_trtllm_prefill_attention.py b/benchmarks/kernels/benchmark_trtllm_prefill_attention.py
new file mode 100644
index 0000000000000..67bd9aebbcca9
--- /dev/null
+++ b/benchmarks/kernels/benchmark_trtllm_prefill_attention.py
@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import csv
+import os
+import random
+from datetime import datetime
+
+import flashinfer
+import torch
+
+FLOAT32_BYTES = torch.finfo(torch.float).bits // 8
+
+# KV Cache Layout for TRT-LLM
+# kv_cache_shape = (num_blocks, 2, num_kv_heads, page_size, head_dim)
+
+
+def to_float8(x, dtype=torch.float8_e4m3fn):
+    finfo = torch.finfo(dtype)
+    min_val, max_val = x.aminmax()
+    amax = torch.maximum(min_val.abs(), max_val.abs()).clamp(min=1e-12)
+    scale = finfo.max / amax * 0.1
+    x_scl_sat = (x * scale).clamp(min=finfo.min, max=finfo.max)
+    return x_scl_sat.to(dtype), scale.float().reciprocal()
+
+
+@torch.no_grad()
+def benchmark_prefill(
+    num_seqs,
+    max_seq_len,
+    page_size=16,
+    dtype=torch.bfloat16,
+    kv_layout="HND",
+    num_kv_heads=8,
+    kv_cache_dtype="auto",
+    head_dim=128,
+    warmup=10,
+    trials=20,
+):
+    torch.set_default_device("cuda")
+    torch.manual_seed(0)
+
+    HEAD_GRP_SIZE = 8
+    MAX_SEQ_LEN = max_seq_len
+
+    # large number to reduce kv_cache reuse
+    NUM_BLOCKS = int(256000 / page_size)
+
+    workspace_buffer = torch.empty(1024 * 1024 * 1024, dtype=torch.int8)
+
+    num_qo_heads = num_kv_heads * HEAD_GRP_SIZE
+    sm_scale = float(1.0 / (head_dim**0.5))
+
+    q_lens = [random.randint(1, MAX_SEQ_LEN) for _ in range(num_seqs)]
+    q_lens[-1] = MAX_SEQ_LEN
+    max_q_len = max(q_lens)
+    q_indptr = torch.cat(
+        [
+            torch.tensor([0], dtype=torch.int32),
+            torch.cumsum(
+                torch.tensor(q_lens, dtype=torch.int32), dim=0, dtype=torch.int32
+            ),
+        ]
+    )
+    q = torch.randn(sum(q_lens), num_qo_heads, head_dim, dtype=dtype)
+
+    kv_lens = [random.randint(0, MAX_SEQ_LEN) for _ in range(num_seqs)]
+    kv_lens[-1] = MAX_SEQ_LEN
+
+    seq_lens = [q_len + kv_len for q_len, kv_len in zip(q_lens, kv_lens)]
+    max_seq_len = max(seq_lens)
+    seq_lens_tensor = torch.tensor(seq_lens, dtype=torch.int32)
+
+    max_num_blocks_per_seq = (max_seq_len + page_size - 1) // page_size
+    block_tables = torch.randint(
+        0, NUM_BLOCKS, (num_seqs, max_num_blocks_per_seq), dtype=torch.int32
+    )
+
+    kv_cache_shape = (NUM_BLOCKS, 2, num_kv_heads, page_size, head_dim)
+    kv_cache = torch.randn(size=kv_cache_shape, dtype=dtype)
+    k_scale = v_scale = 1.0
+
+    if kv_cache_dtype.startswith("fp8"):
+        kv_cache, _ = to_float8(kv_cache)
+
+    output_trtllm = torch.empty(q.shape, dtype=dtype)
+
+    kv_indptr = [0]
+    kv_indices = []
+    kv_last_page_lens = []
+    for i in range(num_seqs):
+        seq_len = seq_lens[i]
+        assert seq_len > 0
+        num_blocks = (seq_len + page_size - 1) // page_size
+        kv_indices.extend(block_tables[i, :num_blocks])
+        kv_indptr.append(kv_indptr[-1] + num_blocks)
+        kv_last_page_len = seq_len % page_size
+        if kv_last_page_len == 0:
+            kv_last_page_len = page_size
+        kv_last_page_lens.append(kv_last_page_len)
+
+    kv_indptr = torch.tensor(kv_indptr, dtype=torch.int32)
+    kv_indices = torch.tensor(kv_indices, dtype=torch.int32)
+    kv_last_page_lens = torch.tensor(kv_last_page_lens, dtype=torch.int32)
+
+    output_baseline = torch.empty(q.shape, dtype=dtype)
+
+    wrapper = flashinfer.BatchPrefillWithPagedKVCacheWrapper(
+        workspace_buffer, kv_layout
+    )
+    wrapper.plan(
+        q_indptr,
+        kv_indptr,
+        kv_indices,
+        kv_last_page_lens,
+        num_qo_heads,
+        num_kv_heads,
+        head_dim,
+        page_size,
+        causal=True,
+        sm_scale=sm_scale,
+        q_data_type=dtype,
+        kv_data_type=kv_cache.dtype,
+    )
+
+    def time_fn(fn, warmup=10, trials=20):
+        torch.cuda.synchronize()
+        start = torch.cuda.Event(enable_timing=True)
+        end = torch.cuda.Event(enable_timing=True)
+        times = []
+        for i in range(warmup):
+            fn()
+        for i in range(trials):
+            start.record()
+            fn()
+            end.record()
+            torch.cuda.synchronize()
+            times.append(start.elapsed_time(end))  # ms
+        return sum(times) / len(times), torch.std(torch.tensor(times))
+
+    def baseline_prefill():
+        return wrapper.run(
+            q, kv_cache, k_scale=k_scale, v_scale=v_scale, out=output_baseline
+        )
+
+    def trt_prefill():
+        return flashinfer.prefill.trtllm_batch_context_with_kv_cache(
+            query=q,
+            kv_cache=kv_cache,
+            workspace_buffer=workspace_buffer,
+            block_tables=block_tables,
+            seq_lens=seq_lens_tensor,
+            max_q_len=max_q_len,
+            max_kv_len=max_seq_len,
+            bmm1_scale=k_scale * sm_scale,
+            bmm2_scale=v_scale,
+            batch_size=num_seqs,
+            cum_seq_lens_q=q_indptr,
+            cum_seq_lens_kv=kv_indptr,
+            out=output_trtllm,
+        )
+
+    trt_mean, trt_std = time_fn(trt_prefill)
+    baseline_mean, baseline_std = time_fn(baseline_prefill)
+
+    # Calculate percentage speedup (positive means TRT is faster)
+    speedup_percent = (baseline_mean - trt_mean) / baseline_mean
+
+    print(
+        f"\t{num_seqs}\t{max_seq_len}\t{trt_mean:.5f}\t{trt_std.item():.5f}"
+        f"\t{baseline_mean:.5f}\t{baseline_std.item():.5f}\t{speedup_percent:.5f}"
+    )
+
+    # Return results for CSV writing
+    return {
+        "num_seqs": num_seqs,
+        "trt_mean": trt_mean,
+        "trt_std": trt_std.item(),
+        "baseline_mean": baseline_mean,
+        "baseline_std": baseline_std.item(),
+        "speedup_percent": speedup_percent,
+        "q_dtype": str(dtype),
+        "kv_cache_dtype": kv_cache_dtype,
+        "page_size": page_size,
+        "num_kv_heads": num_kv_heads,
+        "head_dim": head_dim,
+        "max_seq_len": max_seq_len,
+    }
+
+
+def write_results_to_csv(results, filename=None):
+    """Write benchmark results to CSV file."""
+    if filename is None:
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"flashinfer_trtllm_benchmark_{timestamp}.csv"
+
+    fieldnames = [
+        "num_seqs",
+        "trt_mean",
+        "trt_std",
+        "baseline_mean",
+        "baseline_std",
+        "speedup_percent",
+        "q_dtype",
+        "kv_cache_dtype",
+        "page_size",
+        "num_kv_heads",
+        "head_dim",
+        "max_seq_len",
+    ]
+
+    file_exists = os.path.exists(filename)
+
+    with open(filename, "a", newline="") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+        if not file_exists:
+            writer.writeheader()
+
+        for result in results:
+            writer.writerow(result)
+
+    print(f"Results written to {filename}")
+
+
+if __name__ == "__main__":
+    num_seqs = [1, 4, 8, 16, 32, 64, 128, 256]
+    max_seq_lens = [1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072]
+    all_results = []
+
+    print(
+        "Running benchmark for q_dtype = bfloat16, kv_cache_dtype: bfloat16, "
+        "output_dtype: bfloat16"
+    )
+    print(
+        "\tnum_seqs\tmax_seq_len\ttrt_mean\ttrt_std\tbaseline_mean\t"
+        "baseline_std\tspeedup_percent"
+    )
+    for max_seq_len in max_seq_lens:
+        for bs in num_seqs:
+            result = benchmark_prefill(
+                bs,
+                max_seq_len,
+                dtype=torch.bfloat16,
+                kv_cache_dtype="auto",
+            )
+            all_results.append(result)
+
+    # Write all results to CSV
+    write_results_to_csv(all_results)
diff --git a/tests/kernels/attention/test_flashinfer_trtllm_attention.py b/tests/kernels/attention/test_flashinfer_trtllm_attention.py
new file mode 100644
index 0000000000000..e87ce520bc66b
--- /dev/null
+++ b/tests/kernels/attention/test_flashinfer_trtllm_attention.py
@@ -0,0 +1,293 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Optional
+
+import flashinfer
+import pytest
+import torch
+
+from vllm.platforms import current_platform
+
+if not current_platform.is_device_capability(100):
+    pytest.skip("This TRTLLM kernel requires NVIDIA Blackwell.",
+                allow_module_level=True)
+
+FLOAT32_BYTES = torch.finfo(torch.float).bits // 8
+
+# KV Cache Layout for TRT-LLM
+# kv_cache_shape = (num_blocks, 2, num_kv_heads, page_size, head_dim)
+
+MAX_Q_LEN = 1024
+MAX_KV_LEN = 4096
+BATCH_SIZES = [4, 12]
+NUM_HEADS = [(64, 8), (16, 16), (40, 8), (32, 8)]
+HEAD_SIZES = [128]
+BLOCK_SIZES = [16, 32]
+KV_LAYOUTS = ["HND"]
+DTYPES = [torch.float16, torch.bfloat16]
+KV_CACHE_DTYPES = [None, current_platform.fp8_dtype()]
+NUM_BLOCKS = 32768  # Large enough to test overflow in index calculation.
+SOFT_CAPS = [None, 50.0]
+
+
+def to_float8(x, dtype=torch.float8_e4m3fn):
+    finfo = torch.finfo(dtype)
+    min_val, max_val = x.aminmax()
+    amax = torch.maximum(min_val.abs(), max_val.abs()).clamp(min=1e-12)
+    scale = finfo.max / amax * 0.1
+    x_scl_sat = (x * scale).clamp(min=finfo.min, max=finfo.max)
+    return x_scl_sat.to(dtype), scale.float().reciprocal()
+
+
+@pytest.mark.parametrize("batch_size", BATCH_SIZES)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("head_size", HEAD_SIZES)
+@pytest.mark.parametrize("block_size", BLOCK_SIZES)
+@pytest.mark.parametrize("kv_layout", KV_LAYOUTS)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("kv_cache_dtype", KV_CACHE_DTYPES)
+@pytest.mark.parametrize("soft_cap", SOFT_CAPS)
+@torch.inference_mode
+def test_flashinfer_trtllm_decode_with_baseline(
+    batch_size: int,
+    num_heads: tuple[int, int],
+    head_size: int,
+    block_size: int,
+    kv_layout: str,
+    dtype: torch.dtype,
+    kv_cache_dtype: Optional[torch.dtype],
+    soft_cap: Optional[float],
+) -> None:
+    kv_cache_dtype = dtype if kv_cache_dtype is None else kv_cache_dtype
+
+    torch.set_default_device("cuda")
+    current_platform.seed_everything(0)
+
+    kv_lens = torch.randint(1, MAX_KV_LEN, (batch_size, ), dtype=torch.int32)
+    kv_lens[-1] = MAX_KV_LEN
+    max_kv_len = torch.max(kv_lens).item()
+    num_seqs = len(kv_lens)
+
+    num_query_heads = num_heads[0]
+    num_kv_heads = num_heads[1]
+    assert num_query_heads % num_kv_heads == 0
+
+    scale = head_size**-0.5
+
+    query = torch.randn(num_seqs, num_query_heads, head_size, dtype=dtype)
+
+    kv_cache_shape = None
+    if kv_layout == "NHD":
+        kv_cache_shape = (NUM_BLOCKS, 2, block_size, num_kv_heads, head_size)
+    elif kv_layout == "HND":
+        kv_cache_shape = (NUM_BLOCKS, 2, num_kv_heads, block_size, head_size)
+    else:
+        raise ValueError(f"Invalid kv_layout: {kv_layout}")
+    key_value_cache = torch.randn(kv_cache_shape, dtype=dtype)
+    kv_scale = 1.0
+    if kv_cache_dtype is current_platform.fp8_dtype():
+        key_value_cache, kv_scale = to_float8(key_value_cache,
+                                              current_platform.fp8_dtype())
+
+    max_num_blocks_per_seq = (max_kv_len + block_size - 1) // block_size
+    block_tables = torch.randint(0,
+                                 NUM_BLOCKS,
+                                 (num_seqs, max_num_blocks_per_seq),
+                                 dtype=torch.int32)
+    k_scale = v_scale = kv_scale
+    kv_indptr = [0]
+    kv_indices = []
+    kv_last_page_lens = []
+    for i in range(num_seqs):
+        seq_len = kv_lens[i]
+        assert seq_len > 0
+        num_blocks = (seq_len + block_size - 1) // block_size
+        kv_indices.extend(block_tables[i, :num_blocks])
+        kv_indptr.append(kv_indptr[-1] + num_blocks)
+        kv_last_page_len = seq_len % block_size
+        if kv_last_page_len == 0:
+            kv_last_page_len = block_size
+        kv_last_page_lens.append(kv_last_page_len)
+
+    kv_indptr = torch.tensor(kv_indptr, dtype=torch.int32)
+    kv_indices = torch.tensor(kv_indices, dtype=torch.int32)
+    kv_last_page_lens = torch.tensor(kv_last_page_lens, dtype=torch.int32)
+
+    workspace_buffer = torch.empty(128 * 1024 * 1024, dtype=torch.int8)
+    wrapper = flashinfer.BatchDecodeWithPagedKVCacheWrapper(
+        workspace_buffer,
+        kv_layout,
+        use_tensor_cores=((num_query_heads // num_kv_heads) > 4))
+    wrapper.plan(kv_indptr,
+                 kv_indices,
+                 kv_last_page_lens,
+                 num_query_heads,
+                 num_kv_heads,
+                 head_size,
+                 block_size,
+                 "NONE",
+                 sm_scale=scale,
+                 q_data_type=dtype,
+                 kv_data_type=kv_cache_dtype,
+                 logits_soft_cap=soft_cap)
+
+    output = torch.empty(query.shape, dtype=dtype)
+    wrapper.run(query,
+                key_value_cache,
+                k_scale=k_scale,
+                v_scale=v_scale,
+                out=output)
+
+    # TRTLLM Decode
+    kv_lens_tensor = torch.tensor(kv_lens, dtype=torch.int32)
+    output_trtllm = torch.empty(query.shape, dtype=dtype)
+    flashinfer.decode.trtllm_batch_decode_with_kv_cache(
+        query=query.contiguous(),
+        kv_cache=key_value_cache,
+        workspace_buffer=workspace_buffer,
+        block_tables=block_tables,
+        seq_lens=kv_lens_tensor,
+        max_seq_len=max_kv_len,
+        bmm1_scale=k_scale * scale,
+        bmm2_scale=v_scale,
+        out=output_trtllm,
+    )
+
+    torch.testing.assert_close(output, output_trtllm, atol=1e-2, rtol=1e-2), \
+        f"{torch.max(torch.abs(output - output_trtllm))}"
+
+
+@pytest.mark.parametrize("batch_size", BATCH_SIZES)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("head_size", HEAD_SIZES)
+@pytest.mark.parametrize("block_size", BLOCK_SIZES)
+@pytest.mark.parametrize("kv_layout", KV_LAYOUTS)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("kv_cache_dtype", KV_CACHE_DTYPES)
+@pytest.mark.parametrize("soft_cap", [None])
+@torch.inference_mode
+def test_flashinfer_trtllm_prefill_with_baseline(
+    batch_size: int,
+    num_heads: tuple[int, int],
+    head_size: int,
+    block_size: int,
+    kv_layout: str,
+    dtype: torch.dtype,
+    kv_cache_dtype: Optional[torch.dtype],
+    soft_cap: Optional[float],
+) -> None:
+    kv_cache_dtype = dtype if kv_cache_dtype is None else kv_cache_dtype
+    if dtype != kv_cache_dtype:
+        pytest.skip(f"Not supported dtype({dtype}) with "
+                    "kv_cache_dtype({kv_cache_dtype})")
+
+    torch.set_default_device("cuda")
+    current_platform.seed_everything(0)
+
+    q_lens = torch.randint(1, MAX_Q_LEN, (batch_size, ), dtype=torch.int32)
+    q_lens[-1] = MAX_Q_LEN
+    max_q_len = torch.max(q_lens).item()
+    q_indptr = torch.cat([
+        torch.tensor([0], dtype=torch.int32),
+        torch.cumsum(q_lens, dim=0, dtype=torch.int32),
+    ])
+
+    kv_lens = torch.randint(0, MAX_KV_LEN, (batch_size, ), dtype=torch.int32)
+    kv_lens[-1] = MAX_KV_LEN
+
+    seq_lens = kv_lens + q_lens
+    max_seq_len = torch.max(seq_lens).item()
+    num_seqs = len(seq_lens)
+
+    num_query_heads = num_heads[0]
+    num_kv_heads = num_heads[1]
+    assert num_query_heads % num_kv_heads == 0
+
+    scale = head_size**-0.5
+
+    query = torch.randn(torch.sum(q_lens).item(),
+                        num_query_heads,
+                        head_size,
+                        dtype=dtype)
+
+    kv_cache_shape = None
+    if kv_layout == "NHD":
+        kv_cache_shape = (NUM_BLOCKS, 2, block_size, num_kv_heads, head_size)
+    elif kv_layout == "HND":
+        kv_cache_shape = (NUM_BLOCKS, 2, num_kv_heads, block_size, head_size)
+    else:
+        raise ValueError(f"Invalid kv_layout: {kv_layout}")
+    key_value_cache = torch.randn(kv_cache_shape, dtype=dtype)
+    kv_scale = 1.0
+    if kv_cache_dtype is current_platform.fp8_dtype():
+        key_value_cache, kv_scale = to_float8(key_value_cache,
+                                              current_platform.fp8_dtype())
+
+    max_num_blocks_per_seq = (max_seq_len + block_size - 1) // block_size
+    block_tables = torch.randint(0,
+                                 NUM_BLOCKS,
+                                 (num_seqs, max_num_blocks_per_seq),
+                                 dtype=torch.int32)
+    k_scale = v_scale = kv_scale
+    kv_indptr = [0]
+    kv_indices = []
+    kv_last_page_lens = []
+    for i in range(num_seqs):
+        seq_len = seq_lens[i]
+        assert seq_len > 0
+        num_blocks = (seq_len + block_size - 1) // block_size
+        kv_indices.extend(block_tables[i, :num_blocks])
+        kv_indptr.append(kv_indptr[-1] + num_blocks)
+        kv_last_page_len = seq_len % block_size
+        if kv_last_page_len == 0:
+            kv_last_page_len = block_size
+        kv_last_page_lens.append(kv_last_page_len)
+
+    kv_indptr = torch.tensor(kv_indptr, dtype=torch.int32)
+    kv_indices = torch.tensor(kv_indices, dtype=torch.int32)
+    kv_last_page_lens = torch.tensor(kv_last_page_lens, dtype=torch.int32)
+
+    workspace_buffer = torch.empty(128 * 1024 * 1024, dtype=torch.int8)
+    wrapper = flashinfer.BatchPrefillWithPagedKVCacheWrapper(
+        workspace_buffer, kv_layout)
+    wrapper.plan(q_indptr,
+                 kv_indptr,
+                 kv_indices,
+                 kv_last_page_lens,
+                 num_query_heads,
+                 num_kv_heads,
+                 head_size,
+                 block_size,
+                 causal=True,
+                 sm_scale=scale,
+                 q_data_type=dtype,
+                 kv_data_type=kv_cache_dtype,
+                 logits_soft_cap=soft_cap)
+
+    output = torch.empty(query.shape, dtype=dtype)
+    wrapper.run(query,
+                key_value_cache,
+                k_scale=k_scale,
+                v_scale=v_scale,
+                out=output)
+
+    # TRTLLM Decode
+    output_trtllm = torch.empty(query.shape, dtype=dtype)
+    flashinfer.prefill.trtllm_batch_context_with_kv_cache(
+        query=query.contiguous(),
+        kv_cache=key_value_cache,
+        workspace_buffer=workspace_buffer,
+        block_tables=block_tables,
+        seq_lens=seq_lens,
+        max_q_len=max_q_len,
+        max_kv_len=max_seq_len,
+        bmm1_scale=k_scale * scale,
+        bmm2_scale=v_scale,
+        batch_size=num_seqs,
+        cum_seq_lens_q=q_indptr,
+        cum_seq_lens_kv=kv_indptr,
+        out=output_trtllm,
+    )
+
+    torch.testing.assert_close(output, output_trtllm, atol=1e-2, rtol=1e-2), \
+        f"{torch.max(torch.abs(output - output_trtllm))}"
diff --git a/tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py b/tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py
deleted file mode 100644
index 2e2130fab6a21..0000000000000
--- a/tests/kernels/attention/test_flashinfer_trtllm_decode_attention.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import Optional
-
-import flashinfer
-import pytest
-import torch
-
-from vllm.platforms import current_platform
-
-if not current_platform.is_device_capability(100):
-    pytest.skip("This TRTLLM kernel requires NVIDIA Blackwell.",
-                allow_module_level=True)
-
-FLOAT32_BYTES = torch.finfo(torch.float).bits // 8
-
-# KV Cache Layout for TRT-LLM
-# kv_cache_shape = (num_blocks, 2, num_kv_heads, page_size, head_dim)
-
-NUM_HEADS = [(64, 8), (16, 16), (40, 8), (32, 8)]
-HEAD_SIZES = [128]
-BLOCK_SIZES = [16, 32]
-DTYPES = [torch.float16, torch.bfloat16]
-NUM_BLOCKS = 32768  # Large enough to test overflow in index calculation.
-SOFT_CAPS = [None, 30.0, 50.0]
-
-
-def to_float8(x, dtype=torch.float8_e4m3fn):
-    finfo = torch.finfo(dtype)
-    min_val, max_val = x.aminmax()
-    amax = torch.maximum(min_val.abs(), max_val.abs()).clamp(min=1e-12)
-    scale = finfo.max / amax * 0.1
-    x_scl_sat = (x * scale).clamp(min=finfo.min, max=finfo.max)
-    return x_scl_sat.to(dtype), scale.float().reciprocal()
-
-
-@pytest.mark.parametrize("kv_lens", [[1328, 18, 463], [1, 54, 293, 70]])
-@pytest.mark.parametrize("num_heads", NUM_HEADS)
-@pytest.mark.parametrize("head_size", HEAD_SIZES)
-@pytest.mark.parametrize("block_size", BLOCK_SIZES)
-@pytest.mark.parametrize("kv_layout", ["HND"])
-@pytest.mark.parametrize("dtype", DTYPES)
-@pytest.mark.parametrize("soft_cap", SOFT_CAPS)
-@torch.inference_mode
-def test_flashinfer_trtllm_decode_with_baseline(
-    kv_lens: list[int],
-    num_heads: tuple[int, int],
-    head_size: int,
-    dtype: torch.dtype,
-    block_size: int,
-    soft_cap: Optional[float],
-    kv_layout: str,
-) -> None:
-    torch.set_default_device("cuda")
-    current_platform.seed_everything(0)
-    num_seqs = len(kv_lens)
-    num_query_heads = num_heads[0]
-    num_kv_heads = num_heads[1]
-
-    assert num_query_heads % num_kv_heads == 0
-    max_kv_len = max(kv_lens)
-    scale = head_size**-0.5
-
-    query = torch.randn(num_seqs, num_query_heads, head_size, dtype=dtype)
-    kv_cache_shape = None
-    if kv_layout == "NHD":
-        kv_cache_shape = (NUM_BLOCKS, 2, block_size, num_kv_heads, head_size)
-    elif kv_layout == "HND":
-        kv_cache_shape = (NUM_BLOCKS, 2, num_kv_heads, block_size, head_size)
-    else:
-        raise ValueError(f"Invalid kv_layout: {kv_layout}")
-    key_value_cache = torch.randn(kv_cache_shape, dtype=dtype)
-
-    max_num_blocks_per_seq = (max_kv_len + block_size - 1) // block_size
-    block_tables = torch.randint(0,
-                                 NUM_BLOCKS,
-                                 (num_seqs, max_num_blocks_per_seq),
-                                 dtype=torch.int32)
-    k_scale = v_scale = 1.0
-    kv_indptr = [0]
-    kv_indices = []
-    kv_last_page_lens = []
-    for i in range(num_seqs):
-        seq_len = kv_lens[i]
-        assert seq_len > 0
-        num_blocks = (seq_len + block_size - 1) // block_size
-        kv_indices.extend(block_tables[i, :num_blocks])
-        kv_indptr.append(kv_indptr[-1] + num_blocks)
-        kv_last_page_len = seq_len % block_size
-        if kv_last_page_len == 0:
-            kv_last_page_len = block_size
-        kv_last_page_lens.append(kv_last_page_len)
-
-    kv_indptr = torch.tensor(kv_indptr, dtype=torch.int32)
-    kv_indices = torch.tensor(kv_indices, dtype=torch.int32)
-    kv_last_page_lens = torch.tensor(kv_last_page_lens, dtype=torch.int32)
-
-    workspace_buffer = torch.empty(128 * 1024 * 1024, dtype=torch.int8)
-    wrapper = flashinfer.\
-        BatchDecodeWithPagedKVCacheWrapper(workspace_buffer, kv_layout,
-                use_tensor_cores=(
-                    (num_query_heads//num_kv_heads) > 4)
-                )
-    wrapper.plan(kv_indptr,
-                 kv_indices,
-                 kv_last_page_lens,
-                 num_query_heads,
-                 num_kv_heads,
-                 head_size,
-                 block_size,
-                 "NONE",
-                 q_data_type=dtype,
-                 kv_data_type=dtype,
-                 logits_soft_cap=soft_cap)
-
-    output = torch.empty(query.shape, dtype=dtype)
-    wrapper.run(query, key_value_cache, scale, out=output)
-
-    # TRTLLM Decode
-    max_kv_len = max(kv_lens)
-    kv_lens_tensor = torch.tensor(kv_lens,
-                                  dtype=torch.int,
-                                  device=query.device)
-    output_trtllm = torch.empty(query.shape, dtype=dtype)
-    flashinfer.decode.trtllm_batch_decode_with_kv_cache(
-        query.contiguous(),
-        key_value_cache,
-        workspace_buffer,
-        block_tables,
-        kv_lens_tensor,
-        max_kv_len,
-        bmm1_scale=k_scale * scale,
-        bmm2_scale=v_scale,
-        out=output_trtllm,
-    )
-
-    torch.testing.assert_close(output, output_trtllm, atol=1e-2, rtol=1e-2), \
-        f"{torch.max(torch.abs(output - output_trtllm))}"
diff --git a/vllm/attention/backends/flashinfer.py b/vllm/attention/backends/flashinfer.py
index b3372ce2eca8c..78d8a67e37f8f 100644
--- a/vllm/attention/backends/flashinfer.py
+++ b/vllm/attention/backends/flashinfer.py
@@ -46,7 +46,7 @@ from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.logger import init_logger
 from vllm.utils import (async_tensor_h2d, get_kv_cache_torch_dtype,
                         make_tensor_with_pad)
-from vllm.utils.flashinfer import use_trtllm_decode_attention
+from vllm.utils.flashinfer import use_trtllm_attention
 
 logger = init_logger(__name__)
 
@@ -1114,7 +1114,7 @@ class FlashInferImpl(AttentionImpl):
             assert decode_meta.decode_wrapper._sm_scale == softmax_scale
             # TODO: @pavanimajety Remove this once the switch happens
             # inside flashinfer.
-            if not use_trtllm_decode_attention(
+            if not use_trtllm_attention(
                     num_decode_tokens, attn_metadata.max_decode_seq_len,
                     kv_cache_dtype, attn_metadata.num_qo_heads,
                     attn_metadata.num_kv_heads, attn_metadata.head_dim):
diff --git a/vllm/envs.py b/vllm/envs.py
index 78f955f78a987..9bce5c6d2e0bb 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -1027,9 +1027,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_USE_CUDNN_PREFILL":
     lambda: bool(int(os.getenv("VLLM_USE_CUDNN_PREFILL", "0"))),
 
-    # If set to 1, use the TRTLLM Decode Attention backend in flashinfer.
-    "VLLM_USE_TRTLLM_DECODE_ATTENTION":
-    lambda: os.getenv("VLLM_USE_TRTLLM_DECODE_ATTENTION", None),
+    # If set to 1, use the TRTLLM Attention backend in flashinfer.
+    "VLLM_USE_TRTLLM_ATTENTION":
+    lambda: os.getenv("VLLM_USE_TRTLLM_ATTENTION", None),
 
     # Controls garbage collection during CUDA graph capture.
     # If set to 0 (default), enables GC freezing to speed up capture time.
diff --git a/vllm/utils/flashinfer.py b/vllm/utils/flashinfer.py
index 29967bc516715..cce1aefaf9b02 100644
--- a/vllm/utils/flashinfer.py
+++ b/vllm/utils/flashinfer.py
@@ -124,7 +124,7 @@ def has_flashinfer_cutlass_fused_moe() -> bool:
 @functools.cache
 def has_nvidia_artifactory() -> bool:
     """Return ``True`` if NVIDIA's artifactory is accessible.
-    
+
     This checks connectivity to the kernel inference library artifactory
     which is required for downloading certain cubin kernels like TRTLLM FHMA.
     """
@@ -144,7 +144,7 @@ def has_nvidia_artifactory() -> bool:
         return False
 
 
-def use_trtllm_decode_attention(
+def use_trtllm_attention(
     num_tokens: int,
     max_seq_len: int,
     kv_cache_dtype: str,
@@ -159,29 +159,26 @@ def use_trtllm_decode_attention(
 
     # Check if the dimensions are supported by TRTLLM decode attention
     if (attn_head_size is None or num_qo_heads is None or num_kv_heads is None
-            or num_qo_heads // num_kv_heads > 8
             or num_qo_heads % num_kv_heads != 0 or attn_head_size != 128):
         return False
 
-    env_value = envs.VLLM_USE_TRTLLM_DECODE_ATTENTION
+    env_value = envs.VLLM_USE_TRTLLM_ATTENTION
     if env_value is not None:
-        logger.info_once("VLLM_USE_TRTLLM_DECODE_ATTENTION is set to %s",
-                         env_value)
+        logger.info_once("VLLM_USE_TRTLLM_ATTENTION is set to %s", env_value)
         # Environment variable is set - respect it
         # Making the conditional check for zero because
         # the path is automatically enabled if the batch size condition
         # is satisfied.
         no_use_trtllm = (env_value == "0")
         if not no_use_trtllm:
-            logger.info_once("Using TRTLLM decode attention.")
+            logger.info_once("Using TRTLLM attention.")
         return not no_use_trtllm
     else:
         # Environment variable not set - use auto-detection
         use_trtllm = (num_tokens <= 256 and max_seq_len < 131072
                       and kv_cache_dtype == "auto")
         if use_trtllm:
-            logger.warning_once(
-                "Using TRTLLM decode attention (auto-detected).")
+            logger.warning_once("Using TRTLLM attention (auto-detected).")
         return use_trtllm
 
 
@@ -195,5 +192,5 @@ __all__ = [
     "has_flashinfer_moe",
     "has_flashinfer_cutlass_fused_moe",
     "has_nvidia_artifactory",
-    "use_trtllm_decode_attention",
+    "use_trtllm_attention",
 ]
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 3697cb9387a92..8592d1b26dfa8 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -12,6 +12,7 @@ from flashinfer import (BatchDecodeWithPagedKVCacheWrapper,
                         MultiLevelCascadeAttentionWrapper)
 from flashinfer.decode import (_get_range_buf, get_seq_lens,
                                trtllm_batch_decode_with_kv_cache)
+from flashinfer.prefill import trtllm_batch_context_with_kv_cache
 
 import vllm.envs as envs
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
@@ -19,7 +20,7 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.utils import cdiv, is_pin_memory_available
-from vllm.utils.flashinfer import use_trtllm_decode_attention
+from vllm.utils.flashinfer import use_trtllm_attention
 from vllm.v1.attention.backends.flash_attn import use_cascade_attention
 # yapf conflicts with isort for this block
 # yapf: disable
@@ -149,9 +150,12 @@ class FlashInferMetadata:
     slot_mapping: torch.Tensor
 
     # For flashinfer trtllm batch decode
+    max_q_len: int
     max_seq_len: int
     seq_lens: torch.Tensor
     block_table_tensor: torch.Tensor
+    prefill_use_trtllm: bool
+    decode_use_trtllm: bool
 
     # For handling prefill decode split
     num_decodes: int
@@ -170,6 +174,9 @@ class FlashInferMetadata:
     decode_wrapper: Optional[BatchDecodeWithPagedKVCacheWrapper] = None
     cascade_wrapper: Optional[MultiLevelCascadeAttentionWrapper] = None
 
+    qo_indptr_gpu: Optional[torch.Tensor] = None
+    paged_kv_indptr_gpu: Optional[torch.Tensor] = None
+
     def __post_init__(self):
         if self.head_dim is not None:
             FlashInferBackend.validate_head_size(self.head_dim)
@@ -305,8 +312,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
                 2, self._get_workspace_buffer(), get_kv_cache_layout())
         return self._cascade_wrapper
 
-    def _plan(self, num_prefills: int, num_decodes: int,
-              attn_metadata: FlashInferMetadata):
+    def _plan(self, attn_metadata: FlashInferMetadata):
         if attn_metadata.use_cascade:
             attn_metadata.cascade_wrapper = self._get_cascade_wrapper()
             attn_metadata.cascade_wrapper.plan(
@@ -341,6 +347,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
             # Regular attention (common case).
             # Decodes are at the front and prefills are at the back,
             # according to reorder_batch()
+            num_prefills = attn_metadata.num_prefills
+            num_decodes = attn_metadata.num_decodes
             if num_prefills > 0:
                 # Decodes are first so prefills start after the last decode
                 prefill_start = num_decodes
@@ -356,23 +364,31 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
                 # to be relative to the start of the prefill queries.
                 qo_indptr_cpu = attn_metadata.qo_indptr_cpu[
                     prefill_start:] - attn_metadata.qo_indptr_cpu[prefill_start]
-                attn_metadata.prefill_wrapper.plan(
-                    qo_indptr_cpu,
-                    attn_metadata.paged_kv_indptr_cpu[prefill_start:],
-                    attn_metadata.paged_kv_indices,
-                    attn_metadata.paged_kv_last_page_len_cpu[prefill_start:],
-                    attn_metadata.num_qo_heads,
-                    attn_metadata.num_kv_heads,
-                    attn_metadata.head_dim,
-                    attn_metadata.page_size,
-                    causal=True,
-                    sm_scale=self.global_hyperparameters.sm_scale,
-                    window_left=self.global_hyperparameters.window_left,
-                    logits_soft_cap=self.global_hyperparameters.
-                    logits_soft_cap,
-                    q_data_type=attn_metadata.q_data_type,
-                    kv_data_type=attn_metadata.kv_data_type,
-                )
+                paged_kv_indptr_cpu = attn_metadata.paged_kv_indptr_cpu[
+                    prefill_start:]
+                if not attn_metadata.prefill_use_trtllm:
+                    attn_metadata.prefill_wrapper.plan(
+                        qo_indptr_cpu,
+                        paged_kv_indptr_cpu,
+                        attn_metadata.paged_kv_indices,
+                        attn_metadata.
+                        paged_kv_last_page_len_cpu[prefill_start:],
+                        attn_metadata.num_qo_heads,
+                        attn_metadata.num_kv_heads,
+                        attn_metadata.head_dim,
+                        attn_metadata.page_size,
+                        causal=True,
+                        sm_scale=self.global_hyperparameters.sm_scale,
+                        window_left=self.global_hyperparameters.window_left,
+                        logits_soft_cap=self.global_hyperparameters.
+                        logits_soft_cap,
+                        q_data_type=attn_metadata.q_data_type,
+                        kv_data_type=attn_metadata.kv_data_type,
+                    )
+                else:
+                    attn_metadata.qo_indptr_gpu = qo_indptr_cpu.to(self.device)
+                    attn_metadata.paged_kv_indptr_gpu = paged_kv_indptr_cpu.to(
+                        self.device)
 
             if num_decodes > 0:
                 pure_decode = num_prefills == 0
@@ -400,11 +416,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
 
                 attn_metadata.decode_wrapper = self._get_decode_wrapper(
                     num_input_tokens, use_cudagraph)
-                if not use_trtllm_decode_attention(
-                        num_decodes, attn_metadata.max_seq_len,
-                        self.cache_config.cache_dtype,
-                        attn_metadata.num_qo_heads, attn_metadata.num_kv_heads,
-                        attn_metadata.head_dim):
+                if not attn_metadata.decode_use_trtllm:
                     # Use the persistent buffer with padding length,
                     # instead of the same address but chunked version
                     # in atten_metadata when using cudagraph.
@@ -437,6 +449,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
             split_decodes_and_prefills(common_attn_metadata)
 
         page_size = self.kv_cache_spec.block_size
+        max_q_len = common_attn_metadata.max_query_len
         max_seq_len = common_attn_metadata.seq_lens_cpu.max()
         seq_lens = common_attn_metadata.seq_lens
         seq_lens_cpu = common_attn_metadata.seq_lens_cpu
@@ -503,6 +516,24 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
                 cache_dtype)
         else:
             kv_cache_dtype = self.kv_cache_spec.dtype
+
+        num_qo_heads = self.vllm_config.model_config.get_num_attention_heads(
+            self.vllm_config.parallel_config)
+        num_kv_heads = self.kv_cache_spec.num_kv_heads
+        head_dim = self.kv_cache_spec.head_size
+
+        # currently prefill trtllm attention does not support fp8 kv cache
+        # trtllm may not support sliding window
+        prefill_use_trtllm = (self.global_hyperparameters.window_left == -1
+                              and not cache_dtype.startswith("fp8")
+                              and use_trtllm_attention(
+                                num_prefill_tokens, max_seq_len, cache_dtype,
+                                num_qo_heads, num_kv_heads, head_dim))
+        decode_use_trtllm = (self.global_hyperparameters.window_left == -1
+                             and use_trtllm_attention(
+                                num_decode_tokens, max_seq_len, cache_dtype,
+                                num_qo_heads, num_kv_heads, head_dim))
+
         attn_metadata = FlashInferMetadata(
             num_actual_tokens=num_actual_tokens,
             qo_indptr_cpu=common_attn_metadata.query_start_loc_cpu,
@@ -510,14 +541,19 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
             paged_kv_indices=paged_kv_indices,
             paged_kv_last_page_len_cpu=self.
             paged_kv_last_page_len_cpu[:num_reqs],
-            num_qo_heads=self.vllm_config.model_config.get_num_attention_heads(
-                self.vllm_config.parallel_config),
-            num_kv_heads=self.kv_cache_spec.num_kv_heads,
-            head_dim=self.kv_cache_spec.head_size,
+            num_qo_heads=num_qo_heads,
+            num_kv_heads=num_kv_heads,
+            head_dim=head_dim,
             page_size=page_size,
             kv_data_type=kv_cache_dtype,
             q_data_type=self.vllm_config.model_config.dtype,
             slot_mapping=common_attn_metadata.slot_mapping,
+            max_q_len=max_q_len,
+            max_seq_len=max_seq_len,
+            seq_lens=seq_lens,
+            block_table_tensor=block_table_tensor,
+            prefill_use_trtllm=prefill_use_trtllm,
+            decode_use_trtllm=decode_use_trtllm,
             num_decodes=num_decodes,
             num_decode_tokens=num_decode_tokens,
             num_prefills=num_prefills,
@@ -527,12 +563,9 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
             shared_kv_page_indptr_cpu=shared_kv_page_indptr_cpu,
             shared_kv_page_indices_cpu=shared_kv_page_indices_cpu,
             shared_kv_last_page_len_cpu=shared_kv_last_page_len_cpu,
-            max_seq_len=max_seq_len,
-            seq_lens=seq_lens,
-            block_table_tensor=block_table_tensor,
         )
 
-        self._plan(num_prefills, num_decodes, attn_metadata)
+        self._plan(attn_metadata)
 
         return attn_metadata
 
@@ -698,30 +731,64 @@ class FlashInferImpl(AttentionImpl):
         # Regular attention (common case).
         # Decodes are at the front and prefills are at the back,
         # according to reorder_batch()
-        if prefill_wrapper := attn_metadata.prefill_wrapper:
+        if num_prefill_tokens > 0:
+            prefill_wrapper = attn_metadata.prefill_wrapper
             prefill_query = query[num_decode_tokens:]
             assert prefill_query.shape[0] == num_prefill_tokens
             assert prefill_wrapper is not None
-            assert prefill_wrapper._causal
-            assert prefill_wrapper._window_left == window_left
-            assert prefill_wrapper._logits_soft_cap == (self.logits_soft_cap
-                                                        or 0.0)
-            assert prefill_wrapper._sm_scale == self.scale
-            prefill_wrapper.run(
-                prefill_query,
-                kv_cache_permute,
-                k_scale=layer._k_scale_float,
-                v_scale=layer._v_scale_float,
-                out=output[num_decode_tokens:],
-            )
-        if decode_wrapper := attn_metadata.decode_wrapper:
+
+            if not attn_metadata.prefill_use_trtllm:
+                assert prefill_wrapper._causal
+                assert prefill_wrapper._window_left == window_left
+                assert prefill_wrapper._logits_soft_cap == (
+                    self.logits_soft_cap or 0.0)
+                assert prefill_wrapper._sm_scale == self.scale
+                prefill_wrapper.run(
+                    prefill_query,
+                    kv_cache_permute,
+                    k_scale=layer._k_scale_float,
+                    v_scale=layer._v_scale_float,
+                    out=output[num_decode_tokens:],
+                )
+            else:
+                # prefill_query may be non-contiguous
+                prefill_query = prefill_query.contiguous()
+                workspace_buffer = prefill_wrapper._float_workspace_buffer
+                block_tables_prefill = attn_metadata.block_table_tensor[
+                    num_decode_tokens:]
+                seq_lens_prefill = attn_metadata.seq_lens[num_decode_tokens:]
+
+                # This path needs to be enabled with VLLM_KV_CACHE_LAYOUT = HND
+                assert get_kv_cache_layout() == "HND"
+                assert prefill_query.is_contiguous()
+                assert kv_cache_permute.is_contiguous()
+                assert workspace_buffer.is_contiguous()
+                assert block_tables_prefill.is_contiguous()
+                assert seq_lens_prefill.is_contiguous()
+
+                trtllm_batch_context_with_kv_cache(
+                    query=prefill_query,
+                    kv_cache=kv_cache_permute,
+                    workspace_buffer=workspace_buffer,
+                    block_tables=block_tables_prefill,
+                    seq_lens=seq_lens_prefill,
+                    max_q_len=attn_metadata.max_q_len,
+                    max_kv_len=attn_metadata.max_seq_len,
+                    bmm1_scale=layer._k_scale_float * self.scale,
+                    bmm2_scale=layer._v_scale_float,
+                    batch_size=attn_metadata.num_prefills,
+                    cum_seq_lens_q=attn_metadata.qo_indptr_gpu,
+                    cum_seq_lens_kv=attn_metadata.paged_kv_indptr_gpu,
+                    out=output[num_decode_tokens:],
+                )
+
+        if num_decode_tokens > 0:
+            decode_wrapper = attn_metadata.decode_wrapper
             decode_query = query[:num_decode_tokens]
             assert decode_query.shape[0] == num_decode_tokens
             assert decode_wrapper is not None
-            if not use_trtllm_decode_attention(
-                    attn_metadata.num_decodes, attn_metadata.max_seq_len,
-                    self.kv_cache_dtype, attn_metadata.num_qo_heads,
-                    attn_metadata.num_kv_heads, attn_metadata.head_dim):
+
+            if not attn_metadata.decode_use_trtllm:
                 assert decode_wrapper._window_left == window_left
                 assert decode_wrapper._logits_soft_cap == (self.logits_soft_cap
                                                            or 0.0)
@@ -734,34 +801,32 @@ class FlashInferImpl(AttentionImpl):
                     out=output[:num_decode_tokens],
                 )
             else:
+                # decode_query may be non-contiguous
+                decode_query = decode_query.contiguous()
+                workspace_buffer = decode_wrapper._float_workspace_buffer
+                block_tables_decode = attn_metadata.block_table_tensor[:
+                                                                       num_decode_tokens]
+                seq_lens_decode = attn_metadata.seq_lens[:num_decode_tokens]
+
                 # This path needs to be enabled with VLLM_KV_CACHE_LAYOUT = HND
-                if num_decode_tokens > 0:
-                    # decode_query may be non-contiguous
-                    decode_query = decode_query.contiguous()
-                    block_tables_decode = attn_metadata.block_table_tensor[:
-                                                                           num_decode_tokens]
-                    seq_lens_decode = attn_metadata.seq_lens[:
-                                                             num_decode_tokens]
-                    workspace_buffer = decode_wrapper._float_workspace_buffer
+                assert get_kv_cache_layout() == "HND"
+                assert decode_query.is_contiguous()
+                assert kv_cache_permute.is_contiguous()
+                assert workspace_buffer.is_contiguous()
+                assert block_tables_decode.is_contiguous()
+                assert seq_lens_decode.is_contiguous()
 
-                    assert get_kv_cache_layout() == "HND"
-                    assert decode_query.is_contiguous()
-                    assert kv_cache_permute.is_contiguous()
-                    assert block_tables_decode.is_contiguous()
-                    assert seq_lens_decode.is_contiguous()
-                    assert workspace_buffer.is_contiguous()
-
-                    trtllm_batch_decode_with_kv_cache(
-                        query=decode_query,
-                        kv_cache=kv_cache_permute,
-                        workspace_buffer=workspace_buffer,
-                        block_tables=block_tables_decode,
-                        seq_lens=seq_lens_decode,
-                        max_seq_len=attn_metadata.max_seq_len,
-                        bmm1_scale=layer._k_scale_float * self.scale,
-                        bmm2_scale=layer._v_scale_float,
-                        out=output[:num_decode_tokens],
-                    )
+                trtllm_batch_decode_with_kv_cache(
+                    query=decode_query,
+                    kv_cache=kv_cache_permute,
+                    workspace_buffer=workspace_buffer,
+                    block_tables=block_tables_decode,
+                    seq_lens=seq_lens_decode,
+                    max_seq_len=attn_metadata.max_seq_len,
+                    bmm1_scale=layer._k_scale_float * self.scale,
+                    bmm2_scale=layer._v_scale_float,
+                    out=output[:num_decode_tokens],
+                )
         return output_padded
 
 
@@ -786,8 +851,8 @@ def fast_plan_decode(
     non_blocking: bool = True,
 ) -> None:
     """
-    A faster version of BatchDecodeWithPagedKVCacheWrapper::plan used for 
-    cudagraph capture/replay, while the no cudagraph version turns back 
+    A faster version of BatchDecodeWithPagedKVCacheWrapper::plan used for
+    cudagraph capture/replay, while the no cudagraph version turns back
     to the original plan.
     using original plan after passing host-side buffers:
     - only host-to-device copy of indptr and last_page_len buffers

From 74333ae2f6c3c4aa4b55301e5ed7aba03a5b09f8 Mon Sep 17 00:00:00 2001
From: Ning Xie <andy.xning@gmail.com>
Date: Tue, 5 Aug 2025 18:17:46 +0800
Subject: [PATCH 220/224] [Misc] correct static type check for GroupCoordinator
 (#21946)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
---
 .../device_communicators/ray_communicator.py  |  1 +
 vllm/distributed/eplb/eplb_state.py           |  3 ++
 vllm/distributed/parallel_state.py            | 29 ++++++++++++++++---
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/vllm/distributed/device_communicators/ray_communicator.py b/vllm/distributed/device_communicators/ray_communicator.py
index e5ba297ebcc1b..46cc1c2f52d67 100644
--- a/vllm/distributed/device_communicators/ray_communicator.py
+++ b/vllm/distributed/device_communicators/ray_communicator.py
@@ -70,6 +70,7 @@ class RayPPCommunicator(Communicator):
             assert ray.get_gpu_ids(), "RayPPCommunicator has no GPUs assigned"
 
             self._comm = get_pp_group().device_communicator
+            assert self._comm is not None
 
             # Since we wrap around the vLLM _PP communicator, we use
             # the rank from the vLLM communicator, and ignore the rank
diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py
index af64620849688..f64b516b0d042 100644
--- a/vllm/distributed/eplb/eplb_state.py
+++ b/vllm/distributed/eplb/eplb_state.py
@@ -251,6 +251,7 @@ class EplbState:
 
         if global_expert_load is not None:
             ep_group = get_ep_group().device_group
+            assert ep_group is not None
             assert global_expert_load.shape == (model.num_moe_layers,
                                                 model.num_logical_experts)
             assert global_expert_load.dtype == torch.int64
@@ -357,6 +358,7 @@ class EplbState:
 
             # Collect load metrics from all ranks
             ep_group = get_ep_group().device_group
+            assert ep_group is not None
             num_tokens_list = [
                 torch.empty_like(num_tokens) for _ in range(ep_group.size())
             ]
@@ -412,6 +414,7 @@ class EplbState:
         """
 
         ep_group = get_ep_group().device_group
+        assert ep_group is not None
         ep_rank = ep_group.rank()
 
         time_start = None
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index 470c1355d2a91..6c25cdcfb7b8c 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -196,10 +196,11 @@ class GroupCoordinator:
     #   3     |   1  |  3   |     1      |       3
     local_rank: int  # local rank used to assign devices
     rank_in_group: int  # rank inside the group
-    cpu_group: ProcessGroup  # group for CPU communication
-    device_group: ProcessGroup  # group for device communication
+    cpu_group: Optional[ProcessGroup]  # group for CPU communication
+    device_group: Optional[ProcessGroup]  # group for device communication
     use_device_communicator: bool  # whether to use device communicator
-    device_communicator: DeviceCommunicatorBase  # device communicator
+    device_communicator: Optional[
+        DeviceCommunicatorBase]  # device communicator
     mq_broadcaster: Optional[Any]  # shared memory broadcaster
 
     def __init__(
@@ -250,7 +251,7 @@ class GroupCoordinator:
 
         self.use_device_communicator = use_device_communicator
 
-        self.device_communicator: DeviceCommunicatorBase = None  # type: ignore
+        self.device_communicator = None
         if use_device_communicator and self.world_size > 1:
             device_comm_cls = resolve_obj_by_qualname(
                 current_platform.get_device_communicator_cls())
@@ -364,6 +365,8 @@ class GroupCoordinator:
             return self._all_reduce_out_place(input_)
 
     def _all_reduce_out_place(self, input_: torch.Tensor) -> torch.Tensor:
+        if self.device_communicator is None:
+            raise ValueError("No device communicator found")
         return self.device_communicator.all_reduce(input_)
 
     def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Tensor:
@@ -384,12 +387,16 @@ class GroupCoordinator:
 
     def _all_gather_out_place(self, input_: torch.Tensor,
                               dim: int) -> torch.Tensor:
+        if self.device_communicator is None:
+            raise ValueError("No device communicator found")
         return self.device_communicator.all_gather(input_, dim)
 
     def all_gatherv(self,
                     input_: Union[torch.Tensor, list[torch.Tensor]],
                     dim: int = 0,
                     sizes: Optional[list[int]] = None):
+        if self.device_communicator is None:
+            raise ValueError("No device communicator found")
         return self.device_communicator.all_gatherv(input_, dim, sizes)
 
     def reduce_scatter(self,
@@ -414,10 +421,14 @@ class GroupCoordinator:
                         input_: torch.Tensor,
                         dim: int = -1,
                         sizes: Optional[list[int]] = None) -> torch.Tensor:
+        if self.device_communicator is None:
+            raise ValueError("No device communicator found")
         return self.device_communicator.reduce_scatterv(input_, dim, sizes)
 
     def _reduce_scatter_out_place(self, input_: torch.Tensor,
                                   dim: int) -> torch.Tensor:
+        if self.device_communicator is None:
+            raise ValueError("No device communicator found")
         return self.device_communicator.reduce_scatter(input_, dim)
 
     def gather(self,
@@ -433,6 +444,8 @@ class GroupCoordinator:
         # Bypass the function if we are using only 1 GPU.
         if world_size == 1:
             return input_
+        if self.device_communicator is None:
+            raise ValueError("No device communicator found")
         return self.device_communicator.gather(input_, dst, dim)
 
     def broadcast(self, input_: torch.Tensor, src: int = 0):
@@ -667,6 +680,8 @@ class GroupCoordinator:
         assert dst < self.world_size, f"Invalid dst rank ({dst})"
 
         if self.use_cpu_custom_send_recv:
+            if self.device_communicator is None:
+                raise ValueError("No device communicator found")
             self.device_communicator.send_tensor_dict(  # type: ignore
                 tensor_dict, dst)
             return None
@@ -727,6 +742,8 @@ class GroupCoordinator:
         assert src < self.world_size, f"Invalid src rank ({src})"
 
         if self.use_cpu_custom_send_recv:
+            if self.device_communicator is None:
+                raise ValueError("No device communicator found")
             return self.device_communicator.recv_tensor_dict(  # type: ignore
                 src)
 
@@ -784,6 +801,8 @@ class GroupCoordinator:
     def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None:
         """Sends a tensor to the destination rank in a blocking way"""
         """NOTE: `dst` is the local rank of the destination rank."""
+        if self.device_communicator is None:
+            raise ValueError("No device communicator found")
         self.device_communicator.send(tensor, dst)
 
     def recv(self,
@@ -792,6 +811,8 @@ class GroupCoordinator:
              src: Optional[int] = None) -> torch.Tensor:
         """Receives a tensor from the source rank."""
         """NOTE: `src` is the local rank of the source rank."""
+        if self.device_communicator is None:
+            raise ValueError("No device communicator found")
         return self.device_communicator.recv(size, dtype, src)
 
     def destroy(self):

From 0c275ad5ad1af35636581bffaafc9e694a270378 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= <nlucches@redhat.com>
Date: Tue, 5 Aug 2025 15:53:23 +0200
Subject: [PATCH 221/224] [V0 Deprecation][TPU] Remove V1 flag check from tests
 (#22248)

Signed-off-by: NickLucche <nlucches@redhat.com>
---
 tests/v1/tpu/test_mha_attn.py   | 7 -------
 tests/v1/tpu/test_multimodal.py | 7 -------
 tests/v1/tpu/test_sampler.py    | 8 +-------
 3 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/tests/v1/tpu/test_mha_attn.py b/tests/v1/tpu/test_mha_attn.py
index 55fee4ee1ad43..9d690851b70eb 100644
--- a/tests/v1/tpu/test_mha_attn.py
+++ b/tests/v1/tpu/test_mha_attn.py
@@ -12,17 +12,10 @@ import torch_xla
 import torch_xla.core
 import torch_xla.core.xla_model
 
-from vllm import envs
 from vllm.attention.layer import MultiHeadAttention
 from vllm.attention.selector import _cached_get_attn_backend
 from vllm.platforms import current_platform
 
-if not envs.VLLM_USE_V1:
-    pytest.skip(
-        "Skipping V1 tests. Rerun with `VLLM_USE_V1=1` to test.",
-        allow_module_level=True,
-    )
-
 
 @pytest.fixture(autouse=True)
 def clear_cache():
diff --git a/tests/v1/tpu/test_multimodal.py b/tests/v1/tpu/test_multimodal.py
index a61773a4f611b..bcc2993028dd6 100644
--- a/tests/v1/tpu/test_multimodal.py
+++ b/tests/v1/tpu/test_multimodal.py
@@ -4,19 +4,12 @@
 import openai
 import pytest
 
-from vllm import envs
 from vllm.multimodal.utils import encode_image_base64, fetch_image
 from vllm.platforms import current_platform
 
 from ...entrypoints.openai.test_vision import TEST_IMAGE_URLS
 from ...utils import RemoteOpenAIServer
 
-if not envs.VLLM_USE_V1:
-    pytest.skip(
-        "Skipping V1 tests. Rerun with `VLLM_USE_V1=1` to test.",
-        allow_module_level=True,
-    )
-
 
 @pytest.fixture(scope="session")
 def base64_encoded_image() -> dict[str, str]:
diff --git a/tests/v1/tpu/test_sampler.py b/tests/v1/tpu/test_sampler.py
index 198bb1e16ed9f..fa950e5f7f85b 100644
--- a/tests/v1/tpu/test_sampler.py
+++ b/tests/v1/tpu/test_sampler.py
@@ -4,16 +4,10 @@ import random
 
 import pytest
 
-from vllm import LLM, envs
+from vllm import LLM
 from vllm.platforms import current_platform
 from vllm.sampling_params import SamplingParams
 
-if not envs.VLLM_USE_V1:
-    pytest.skip(
-        "Skipping V1 tests. Rerun with `VLLM_USE_V1=1` to test.",
-        allow_module_level=True,
-    )
-
 
 @pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"])
 @pytest.mark.skipif(not current_platform.is_tpu(),

From c494f96fbcf5e9f19f59e3dea6c2780aeb6c567f Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 5 Aug 2025 09:57:10 -0400
Subject: [PATCH 222/224] Use UV_LINK_MODE=copy in Dockerfile to avoid hardlink
 fail (#22128)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 0d6afca74e867..c529d22e63191 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -119,6 +119,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Reference: https://github.com/astral-sh/uv/pull/1694
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
+# Use copy mode to avoid hardlink failures with Docker cache mounts
+ENV UV_LINK_MODE=copy
 
 # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
 # as it was causing spam when compiling the CUTLASS kernels
@@ -181,6 +183,8 @@ COPY requirements/build.txt requirements/build.txt
 # Reference: https://github.com/astral-sh/uv/pull/1694
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
+# Use copy mode to avoid hardlink failures with Docker cache mounts
+ENV UV_LINK_MODE=copy
 
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system -r requirements/build.txt \
@@ -272,6 +276,8 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
 # Reference: https://github.com/astral-sh/uv/pull/1694
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
+# Use copy mode to avoid hardlink failures with Docker cache mounts
+ENV UV_LINK_MODE=copy
 
 COPY requirements/lint.txt requirements/lint.txt
 COPY requirements/test.txt requirements/test.txt
@@ -341,6 +347,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Reference: https://github.com/astral-sh/uv/pull/1694
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
+# Use copy mode to avoid hardlink failures with Docker cache mounts
+ENV UV_LINK_MODE=copy
 
 # Workaround for https://github.com/openai/triton/issues/2507 and
 # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
@@ -472,6 +480,8 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 # Reference: https://github.com/astral-sh/uv/pull/1694
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
+# Use copy mode to avoid hardlink failures with Docker cache mounts
+ENV UV_LINK_MODE=copy
 
 # install development dependencies (for testing)
 RUN --mount=type=cache,target=/root/.cache/uv \

From a7cb6101ca7bd3d3ee94a5fe37caab8ebca32d80 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 5 Aug 2025 12:39:38 -0400
Subject: [PATCH 223/224] [CI/Build] Update flashinfer to 0.2.9 (#22233)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile | 2 +-
 setup.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index c529d22e63191..d444087a3eff7 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -392,7 +392,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
 ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
 # Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
 # We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel.
-ARG FLASHINFER_GIT_REF="v0.2.9rc2"
+ARG FLASHINFER_GIT_REF="v0.2.9"
 RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
   . /etc/environment
     git clone --depth 1 --recursive --shallow-submodules \
diff --git a/setup.py b/setup.py
index 64cfbb8db962b..c6f4985c5930e 100644
--- a/setup.py
+++ b/setup.py
@@ -665,7 +665,7 @@ setup(
                   "mistral_common[audio]"],  # Required for audio processing
         "video": [],  # Kept for backwards compatibility
         # FlashInfer should be updated together with the Dockerfile
-        "flashinfer": ["flashinfer-python==0.2.9rc2"],
+        "flashinfer": ["flashinfer-python==0.2.9"],
     },
     cmdclass=cmdclass,
     package_data=package_data,

From ae87ddd040b793fd9f4f05cb660a4728c81d7670 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Tue, 5 Aug 2025 12:40:23 -0400
Subject: [PATCH 224/224] [Refactor] Remove Unused Environment Variable
 `VLLM_NO_DEPRECATION_WARNING` (#22199)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 vllm/envs.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/vllm/envs.py b/vllm/envs.py
index 9bce5c6d2e0bb..e28e9658e5b53 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -70,7 +70,6 @@ if TYPE_CHECKING:
     NVCC_THREADS: Optional[str] = None
     VLLM_USE_PRECOMPILED: bool = False
     VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False
-    VLLM_NO_DEPRECATION_WARNING: bool = False
     VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
     CMAKE_BUILD_TYPE: Optional[str] = None
     VERBOSE: bool = False
@@ -582,10 +581,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
     lambda: bool(
         int(os.getenv("VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING", "1"))),
 
-    # If set, vllm will skip the deprecation warnings.
-    "VLLM_NO_DEPRECATION_WARNING":
-    lambda: bool(int(os.getenv("VLLM_NO_DEPRECATION_WARNING", "0"))),
-
     # If set, the OpenAI API server will stay alive even after the underlying
     # AsyncLLMEngine errors and stops serving requests
     "VLLM_KEEP_ALIVE_ON_ENGINE_DEATH":