mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 21:55:50 +08:00
Updating lm-format-enforcer version and adding links to decoding libraries in docs (#4222)
This commit is contained in:
parent
91528575ec
commit
cc74b2b232
@ -12,7 +12,7 @@ uvicorn[standard]
|
||||
pydantic >= 2.0 # Required for OpenAI server.
|
||||
prometheus_client >= 0.18.0
|
||||
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
||||
lm-format-enforcer == 0.9.3
|
||||
lm-format-enforcer == 0.9.8
|
||||
outlines == 0.0.34 # Requires torch >= 2.1.0
|
||||
typing_extensions
|
||||
filelock >= 3.10.4 # filelock starts to support `mode` argument from 3.10.4
|
||||
|
||||
@ -197,7 +197,11 @@ class EngineArgs:
|
||||
default='outlines',
|
||||
choices=['outlines', 'lm-format-enforcer'],
|
||||
help='Which engine will be used for guided decoding'
|
||||
' (JSON schema / regex etc).')
|
||||
' (JSON schema / regex etc) by default. Currently support '
|
||||
'https://github.com/outlines-dev/outlines and '
|
||||
'https://github.com/noamgat/lm-format-enforcer.'
|
||||
' Can be overridden per request via guided_decoding_backend'
|
||||
' parameter.')
|
||||
# Parallel arguments
|
||||
parser.add_argument('--worker-use-ray',
|
||||
action='store_true',
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from torch.nn import Module
|
||||
@ -114,7 +114,7 @@ class Fp8LinearMethod(LinearMethodBase):
|
||||
return output
|
||||
|
||||
|
||||
def per_tensor_quantize(tensor: torch.Tensor) -> tuple[torch.Tensor, float]:
|
||||
def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]:
|
||||
"""Quantize a tensor using per-tensor static scaling factor.
|
||||
|
||||
Args:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user