mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-19 06:35:01 +08:00
[Kernel] changing fused moe kernel chunk size default to 32k (#7995)
This commit is contained in:
parent
80c7b089b1
commit
34a0e96d46
@ -352,7 +352,7 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
|||||||
os.path.join(get_default_cache_root(), "vllm", "xla_cache"),
|
os.path.join(get_default_cache_root(), "vllm", "xla_cache"),
|
||||||
)),
|
)),
|
||||||
"VLLM_FUSED_MOE_CHUNK_SIZE":
|
"VLLM_FUSED_MOE_CHUNK_SIZE":
|
||||||
lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "65536")),
|
lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "32768")),
|
||||||
|
|
||||||
# If set, vllm will skip the deprecation warnings.
|
# If set, vllm will skip the deprecation warnings.
|
||||||
"VLLM_NO_DEPRECATION_WARNING":
|
"VLLM_NO_DEPRECATION_WARNING":
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user