diff --git a/tests/models/multimodal/pooling/test_radio.py b/tests/models/multimodal/pooling/test_radio.py index 8929563d8b050..1f5baed83fa62 100644 --- a/tests/models/multimodal/pooling/test_radio.py +++ b/tests/models/multimodal/pooling/test_radio.py @@ -42,6 +42,10 @@ def run_radio_test( config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) + # RADIO model on HF does not properly handle torch_dtype argument + # And relies on args["dtype"] which we have to patch manually: + config.args["dtype"] = torch_dtype + hf_model = AutoModel.from_pretrained( model_id, config=config, @@ -50,6 +54,13 @@ def run_radio_test( ).to("cuda") hf_model.eval() + # A HF model has image normalization as a part of model's forward + # However in vLLM we don't make normalization a part of the model + # forward step since mean/std stored as model's parameters and + # subject to precision loss (when using fp16/bf16) which negatively + # affects evaluation benchmarks. + hf_model.make_preprocessor_external() + hf_outputs_per_image = [ hf_model(pixel_value.to("cuda")).features for pixel_value in pixel_values ] @@ -78,7 +89,7 @@ def run_radio_test( "nvidia/C-RADIOv2-H", ], ) -@pytest.mark.parametrize("dtype", ["half"]) +@pytest.mark.parametrize("dtype", ["half", "bfloat16"]) def test_radio(dist_init, image_assets, model_id, dtype: str) -> None: run_radio_test( image_assets,