mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-27 06:52:32 +08:00
[Model] IBM Granite 3.1 (#11307)
Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
This commit is contained in:
parent
5a9da2e6e9
commit
17ca964273
@ -194,8 +194,8 @@ Text Generation (``--task generate``)
|
||||
-
|
||||
- ✅︎
|
||||
* - :code:`GraniteForCausalLM`
|
||||
- Granite 3.0, PowerLM
|
||||
- :code:`ibm-granite/granite-3.0-2b-base`, :code:`ibm-granite/granite-3.0-8b-instruct`, :code:`ibm/PowerLM-3b`, etc.
|
||||
- Granite 3.0, Granite 3.1, PowerLM
|
||||
- :code:`ibm-granite/granite-3.0-2b-base`, :code:`ibm-granite/granite-3.1-8b-instruct`, :code:`ibm/PowerLM-3b`, etc.
|
||||
- ✅︎
|
||||
- ✅︎
|
||||
* - :code:`GraniteMoeForCausalLM`
|
||||
|
||||
@ -170,6 +170,12 @@ Recommended flags: `--tool-call-parser granite --chat-template examples/tool_cha
|
||||
|
||||
`examples/tool_chat_template_granite.jinja`: this is a modified chat template from the original on Huggingface. Parallel function calls are supported.
|
||||
|
||||
* `ibm-granite/granite-3.1-8b-instruct`
|
||||
|
||||
Recommended flags: `--tool-call-parser granite`
|
||||
|
||||
The chat template from Huggingface can be used directly. Parallel function calls are supported.
|
||||
|
||||
* `ibm-granite/granite-20b-functioncalling`
|
||||
|
||||
Recommended flags: `--tool-call-parser granite-20b-fc --chat-template examples/tool_chat_template_granite_20b_fc.jinja`
|
||||
@ -284,4 +290,3 @@ Then you can use this plugin in the command line like this.
|
||||
--tool-call-parser example \
|
||||
--chat-template <your chat template> \
|
||||
```
|
||||
|
||||
|
||||
@ -103,7 +103,7 @@ CONFIGS: Dict[str, ServerConfig] = {
|
||||
"supports_rocm":
|
||||
False,
|
||||
},
|
||||
"granite8b": {
|
||||
"granite-3.0-8b": {
|
||||
"model":
|
||||
"ibm-granite/granite-3.0-8b-instruct",
|
||||
"arguments": [
|
||||
@ -111,6 +111,14 @@ CONFIGS: Dict[str, ServerConfig] = {
|
||||
str(VLLM_PATH / "examples/tool_chat_template_granite.jinja")
|
||||
],
|
||||
},
|
||||
"granite-3.1-8b": {
|
||||
"model": "ibm-granite/granite-3.1-8b-instruct",
|
||||
"arguments": [
|
||||
"--tool-call-parser",
|
||||
"granite",
|
||||
],
|
||||
"supports_parallel": True,
|
||||
},
|
||||
"internlm": {
|
||||
"model":
|
||||
"internlm/internlm2_5-7b-chat",
|
||||
|
||||
@ -35,13 +35,18 @@ class GraniteToolParser(ToolParser):
|
||||
|
||||
def __init__(self, tokenizer: AnyTokenizer):
|
||||
super().__init__(tokenizer)
|
||||
# for granite 3.0, the token `<|tool_call|>`
|
||||
self.bot_token = "<|tool_call|>"
|
||||
# for granite 3.1, the string `<tool_call>`
|
||||
self.bot_string = "<tool_call>"
|
||||
|
||||
def extract_tool_calls(
|
||||
self, model_output: str,
|
||||
request: ChatCompletionRequest) -> ExtractedToolCallInformation:
|
||||
# remove whitespace and the BOT token if it exists
|
||||
stripped = model_output.strip().removeprefix(self.bot_token).lstrip()
|
||||
stripped = model_output.strip()\
|
||||
.removeprefix(self.bot_token)\
|
||||
.removeprefix(self.bot_string)\
|
||||
.lstrip()
|
||||
if not stripped or stripped[0] != '[':
|
||||
return ExtractedToolCallInformation(tools_called=False,
|
||||
tool_calls=[],
|
||||
@ -91,6 +96,9 @@ class GraniteToolParser(ToolParser):
|
||||
if current_text[start_idx:].startswith(self.bot_token):
|
||||
start_idx = consume_space(start_idx + len(self.bot_token),
|
||||
current_text)
|
||||
if current_text[start_idx:].startswith(self.bot_string):
|
||||
start_idx = consume_space(start_idx + len(self.bot_string),
|
||||
current_text)
|
||||
if not current_text or start_idx >= len(current_text)\
|
||||
or current_text[start_idx] != '[':
|
||||
return DeltaMessage(content=delta_text)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user