mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-08 06:09:10 +08:00
[Bugfix] Update Florence-2 tokenizer to make grounding tasks work (#16734)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
parent
95aca283b4
commit
cb072ce93b
@ -22,7 +22,7 @@ class ModelRequestData(NamedTuple):
|
|||||||
def run_florence2():
|
def run_florence2():
|
||||||
engine_args = EngineArgs(
|
engine_args = EngineArgs(
|
||||||
model="microsoft/Florence-2-large",
|
model="microsoft/Florence-2-large",
|
||||||
tokenizer="facebook/bart-large",
|
tokenizer="Isotr0py/Florence-2-tokenizer",
|
||||||
max_num_seqs=8,
|
max_num_seqs=8,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
limit_mm_per_prompt={"image": 1},
|
limit_mm_per_prompt={"image": 1},
|
||||||
@ -165,6 +165,7 @@ def main(args):
|
|||||||
temperature=0,
|
temperature=0,
|
||||||
top_p=1.0,
|
top_p=1.0,
|
||||||
max_tokens=64,
|
max_tokens=64,
|
||||||
|
skip_special_tokens=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|||||||
@ -150,7 +150,7 @@ def run_florence2(questions: list[str], modality: str) -> ModelRequestData:
|
|||||||
|
|
||||||
engine_args = EngineArgs(
|
engine_args = EngineArgs(
|
||||||
model="microsoft/Florence-2-large",
|
model="microsoft/Florence-2-large",
|
||||||
tokenizer="facebook/bart-large",
|
tokenizer="Isotr0py/Florence-2-tokenizer",
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
|
|||||||
@ -925,6 +925,7 @@ class VllmRunner:
|
|||||||
max_tokens: int,
|
max_tokens: int,
|
||||||
num_logprobs: int,
|
num_logprobs: int,
|
||||||
num_prompt_logprobs: Optional[int] = None,
|
num_prompt_logprobs: Optional[int] = None,
|
||||||
|
skip_special_tokens: bool = True,
|
||||||
) -> Union[list[TokensTextLogprobs],
|
) -> Union[list[TokensTextLogprobs],
|
||||||
list[TokensTextLogprobsPromptLogprobs]]:
|
list[TokensTextLogprobsPromptLogprobs]]:
|
||||||
greedy_logprobs_params = SamplingParams(
|
greedy_logprobs_params = SamplingParams(
|
||||||
@ -932,6 +933,7 @@ class VllmRunner:
|
|||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
logprobs=num_logprobs,
|
logprobs=num_logprobs,
|
||||||
prompt_logprobs=(num_prompt_logprobs),
|
prompt_logprobs=(num_prompt_logprobs),
|
||||||
|
skip_special_tokens=skip_special_tokens,
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
Greedy logprobs generation for vLLM encoder/decoder models
|
Greedy logprobs generation for vLLM encoder/decoder models
|
||||||
|
|||||||
@ -13,12 +13,12 @@ from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
|
|||||||
from ...utils import check_logprobs_close
|
from ...utils import check_logprobs_close
|
||||||
|
|
||||||
MODELS = ["microsoft/Florence-2-base"]
|
MODELS = ["microsoft/Florence-2-base"]
|
||||||
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
|
# Florence-2 model repo's tokenizer config is missing some special tokens.
|
||||||
# Therefore, we borrow the BartTokenizer from the original Bart model
|
# Therefore, we use a converted tokenizer from a forked repo
|
||||||
TOKENIZER = "facebook/bart-base"
|
TOKENIZER = "Isotr0py/Florence-2-tokenizer"
|
||||||
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
|
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
|
||||||
"stop_sign":
|
"stop_sign":
|
||||||
"<CAPTION>", # special task token
|
"<OD>", # special task token which will output special tokens
|
||||||
"cherry_blossom":
|
"cherry_blossom":
|
||||||
"Describe in detail what is shown in the image.",
|
"Describe in detail what is shown in the image.",
|
||||||
})
|
})
|
||||||
@ -45,7 +45,6 @@ def hf_to_vllm_output(hf_output: tuple[list[int], str,
|
|||||||
output_ids, output_str, out_logprobs = hf_output
|
output_ids, output_str, out_logprobs = hf_output
|
||||||
|
|
||||||
output_str = output_str.replace("</s>", "").replace("<s>", "")
|
output_str = output_str.replace("</s>", "").replace("<s>", "")
|
||||||
output_ids = [ids for ids in output_ids if ids not in [0, 2]]
|
|
||||||
|
|
||||||
return output_ids, output_str, out_logprobs
|
return output_ids, output_str, out_logprobs
|
||||||
|
|
||||||
@ -71,8 +70,11 @@ def run_test(
|
|||||||
enforce_eager=True) as vllm_model:
|
enforce_eager=True) as vllm_model:
|
||||||
vllm_outputs_per_case = [
|
vllm_outputs_per_case = [
|
||||||
vllm_model.generate_encoder_decoder_greedy_logprobs(
|
vllm_model.generate_encoder_decoder_greedy_logprobs(
|
||||||
prompts, max_tokens, num_logprobs=num_logprobs)
|
prompts,
|
||||||
for prompts in inputs
|
max_tokens,
|
||||||
|
num_logprobs=num_logprobs,
|
||||||
|
skip_special_tokens=False,
|
||||||
|
) for prompts in inputs
|
||||||
]
|
]
|
||||||
|
|
||||||
hf_inputs = [get_hf_images_prompts(prompts) for prompts in inputs]
|
hf_inputs = [get_hf_images_prompts(prompts) for prompts in inputs]
|
||||||
@ -93,6 +95,7 @@ def run_test(
|
|||||||
outputs_1_lst=vllm_outputs,
|
outputs_1_lst=vllm_outputs,
|
||||||
name_0="hf",
|
name_0="hf",
|
||||||
name_1="vllm",
|
name_1="vllm",
|
||||||
|
num_outputs_0_skip_tokens=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -366,7 +366,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
|
|||||||
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
|
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
|
||||||
# Therefore, we borrow the BartTokenizer from the original Bart model
|
# Therefore, we borrow the BartTokenizer from the original Bart model
|
||||||
"Florence2ForConditionalGeneration": _HfExamplesInfo("microsoft/Florence-2-base", # noqa: E501
|
"Florence2ForConditionalGeneration": _HfExamplesInfo("microsoft/Florence-2-base", # noqa: E501
|
||||||
tokenizer="facebook/bart-base",
|
tokenizer="Isotr0py/Florence-2-tokenizer",
|
||||||
trust_remote_code=True), # noqa: E501
|
trust_remote_code=True), # noqa: E501
|
||||||
"MllamaForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-3.2-11B-Vision-Instruct"), # noqa: E501
|
"MllamaForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-3.2-11B-Vision-Instruct"), # noqa: E501
|
||||||
"WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"), # noqa: E501
|
"WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"), # noqa: E501
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user